def __or__(self, other): if self is other: return self try: ag = other.getAtomGroup() except AttributeError: raise TypeError('other must be an AtomPointer') if self._ag != ag: raise ValueError('both selections must be from the same AtomGroup') acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warn('Active coordinate set indices do not match, it will ' 'be set to zero.') acsi = 0 indices = unique(concatenate( (self._getIndices(), other._getIndices()))) if indices[-1] == atommap.DUMMY: indices = indices[:-1] return Selection(self._ag, indices, '({0}) or ({1})'.format(self.getSelstr(), other.getSelstr()), acsi, unique=True)
def _superpose(self, **kwargs): """Superpose conformations and update coordinates.""" calcT = getTransformation if kwargs.get('trans', False): if self._trans is not None: LOGGER.info('Existing transformations will be overwritten.') trans = np.zeros((self._n_csets, 4, 4)) else: trans = None indices = self._indices if indices is None: weights = self._weights coords = self._coords confs = self._confs confs_selected = self._confs else: weights = self._weights[:, indices] coords = self._coords[indices] confs = self._confs confs_selected = self._confs[:, indices] for i, conf in enumerate(confs_selected): rmat, tvec = calcT(conf, coords, weights[i]) if trans is not None: trans[i][:3, :3] = rmat trans[i][:3, 3] = tvec confs[i] = tvec + np.dot(confs[i], rmat.T) self._trans = trans
def wwPDBServer(*key): """Set/get `wwPDB`_ FTP/HTTP server location used for downloading PDB structures. Use one of the following keywords for setting a server: +---------------------------+-----------------------------+ | wwPDB FTP server | *Key* (case insensitive) | +===========================+=============================+ | RCSB PDB (USA) (default) | RCSB, USA, US | +---------------------------+-----------------------------+ | PDBe (Europe) | PDBe, Europe, Euro, EU | +---------------------------+-----------------------------+ | PDBj (Japan) | PDBj, Japan, Jp | +---------------------------+-----------------------------+ .. _wwPDB: http://www.wwpdb.org/""" if not key: return SETTINGS.get('wwpdb', None) elif len(key) == 1: try: key = key[0].lower() except AttributeError: raise TypeError('key must be a string') if key in WWPDB_FTP_SERVERS: SETTINGS['wwpdb'] = key SETTINGS.save() LOGGER.info('wwPDB server is set to {}.' .format(WWPDB_FTP_SERVERS[key][0])) else: raise ValueError('{0} is not a valid wwPDB server identifier' .format(repr(key))) else: raise TypeError('one wwPDB server identifier is expected, {0} given' .format(len(key)))
def alignCoordsets(atoms, weights=None): """Returns *atoms* after superposing coordinate sets onto its active coordinate set. Transformations will be calculated for *atoms* and applied to its :class:`.AtomGroup`, when applicable. Optionally, atomic *weights* can be passed for weighted superposition.""" try: acsi, n_csets = atoms.getACSIndex(), atoms.numCoordsets() except AttributeError: raise TypeError('atoms must have type Atomic, not {0}'.format( type(atoms))) if n_csets < 2: LOGGER.warning('{0} contains fewer than two coordinate sets, ' 'alignment was not performed.'.format(str(atoms))) return try: ag = atoms.getAtomGroup() except AttributeError: ag = atoms agacsi = ag.getACSIndex() tar = atoms._getCoords() for i in range(n_csets): if i == acsi: continue atoms.setACSIndex(i) ag.setACSIndex(i) calcTransformation(atoms, tar, weights).apply(ag) atoms.setACSIndex(acsi) ag.setACSIndex(agacsi) return atoms
def getCoordsets(self, indices=None): """Returns coordinate sets at given *indices*. *indices* may be an integer, a list of integers or **None**. **None** returns all coordinate sets.""" if self._closed: raise ValueError('I/O operation on closed file') if (self._indices is None and (indices is None or indices == slice(None))): nfi = self._nfi self.reset() n_floats = self._n_floats + self._unitcell * 14 n_atoms = self._n_atoms n_csets = self._n_csets data = self._file.read(self._itemsize * n_floats * n_csets) data = fromstring(data, self._dtype) if len(data) > n_floats * n_csets: n_csets = len(data) / n_floats data = data[:n_csets] LOGGER.warning('DCD is corrupt, {0} out of {1} frames ' 'were parsed.'.format(n_csets, self._n_csets)) data = data.reshape((n_csets, n_floats)) if self._unitcell: data = data[:, 14:] data = data.reshape((n_csets, 3, n_atoms + 2)) data = data[:, :, 1:-1] data = data.transpose(0, 2, 1) self.goto(nfi) if self._astype is not None and self._astype != data.dtype: data = data.astype(self._astype) return data else: return TrajFile.getCoordsets(self, indices)
def loadAtoms(filename): """Returns :class:`.AtomGroup` instance loaded from *filename* using :func:`numpy.load` function. See also :func:`saveAtoms`.""" LOGGER.timeit('_prody_loadatoms') attr_dict = load(filename) files = set(attr_dict.files) if not 'n_atoms' in files: raise ValueError('{0} is not a valid atomic data file'.format( repr(filename))) title = str(attr_dict['title']) if 'coordinates' in files: coords = attr_dict['coordinates'] ag = AtomGroup(title) ag._n_csets = int(attr_dict['n_csets']) ag._coords = coords ag._n_atoms = int(attr_dict['n_atoms']) ag._setTimeStamp() if 'flagsts' in files: ag._flagsts = int(attr_dict['flagsts']) if 'bonds' in files and 'bmap' in files and 'numbonds' in files: ag._bonds = attr_dict['bonds'] ag._bmap = attr_dict['bmap'] ag._data['numbonds'] = attr_dict['numbonds'] skip_flags = set() for label, data in attr_dict.items(): if label in SKIPLOAD: continue if data.ndim == 1 and data.dtype == bool: if label in skip_flags: continue else: ag._setFlags(label, data) skip_flags.update(flags.ALIASES.get(label, [label])) else: ag.setData(label, data) for label in ['segindex', 'chindex', 'resindex']: if label in attr_dict: ag._data[label] = attr_dict[label] if ag.numCoordsets() > 0: ag._acsi = 0 if 'cslabels' in files: ag.setCSLabels(list(attr_dict['cslabels'])) LOGGER.report('Atom group was loaded in %.2fs.', '_prody_loadatoms') return ag
def getDeviations(self): """Returns deviations from reference coordinates for selected atoms. Conformations can be aligned using one of :meth:`superpose` or :meth:`iterpose` methods prior to calculating deviations.""" if not isinstance(self._confs, ndarray): LOGGER.warning('Conformations are not set.') return None if not isinstance(self._coords, ndarray): LOGGER.warning('Coordinates are not set.') return None return self._getCoordsets() - self._getCoords()
def superpose(self, **kwargs): """Superpose the ensemble onto the reference coordinates obtained by :meth:`getCoords`. """ trans = kwargs.pop('trans', True) if self._coords is None: raise ValueError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise ValueError('conformations are not set, use `addCoordset`') LOGGER.timeit('_prody_ensemble') self._superpose(trans=trans) # trans kwarg is used by PDBEnsemble LOGGER.report('Superposition completed in %.2f seconds.', '_prody_ensemble')
def __add__(self, other): """Concatenate ensembles. The reference coordinates, atoms, and weights of *self* is used in the resulting ensemble.""" if not isinstance(other, Ensemble): raise TypeError('an Ensemble instance cannot be added to an {0} ' 'instance'.format(type(other))) elif self._n_atoms != other._n_atoms: raise ValueError('Ensembles must have same number of atoms.') ensemble = Ensemble('{0} + {1}'.format(self.getTitle(), other.getTitle())) if self._coords is not None: ensemble.setCoords(self._coords.copy()) if self._confs is not None: ensemble.addCoordset(self._confs.copy()) if other._confs is not None: ensemble.addCoordset(other._confs.copy()) all_keys = list(self._data.keys()) + list(other._data.keys()) for key in all_keys: if key in self._data and key in other._data: self_data = self._data[key] other_data = other._data[key] elif key in self._data: self_data = self._data[key] other_data = zeros(other.numConfs(), dtype=self_data.dtype) elif key in other._data: other_data = other._data[key] self_data = zeros(other.numConfs(), dtype=other_data.dtype) ensemble._data[key] = concatenate((self_data, other_data), axis=0) if self._weights is not None: LOGGER.info('Atom weights from {0} are used in {1}.'.format( repr(self._title), repr(ensemble.getTitle()))) ensemble.setWeights(self._weights.copy()) elif other._weights is not None: ensemble.setWeights(other._weights.copy()) if self._atoms is not None: ensemble.setAtoms(self._atoms) ensemble._indices = self._indices else: ensemble.setAtoms(other._atoms) ensemble._indices = other._indices return ensemble
def superpose(self, **kwargs): """Superpose the ensemble onto the reference coordinates. :arg ref: index of the reference coordinate. If **None**, the average coordinate will be assumed as the reference. Default is **None** :type ref: int """ ref = kwargs.pop('ref', None) if self._coords is None: raise ValueError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise ValueError('conformations are not set, use `addCoordset`') LOGGER.timeit('_prody_ensemble') self._superpose(ref=ref) # trans kwarg is used by PDBEnsemble LOGGER.report('Superposition completed in %.2f seconds.', '_prody_ensemble')
def alignAtomicsUsingEnsemble(atomics, ensemble): """Align a set of :class:`.Atomic` objects using transformations from *ensemble*, which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Transformations will be applied based on indices so *atomics* and *ensemble* must have the same number of members. :arg atomics: a set of :class:`.Atomic` objects to be aligned :type atomics: tuple, list, :class:`~numpy.ndarray` :arg ensemble: a :class:`.PDBEnsemble` or a :class:`.PDBConformation` from which transformations can be extracted :type ensemble: :class:`.PDBEnsemble`, :class:`.PDBConformation` """ if not isListLike(atomics): raise TypeError('atomics must be list-like') if not isinstance(ensemble, (PDBEnsemble, PDBConformation)): raise TypeError('ensemble must be a PDBEnsemble or PDBConformation') if isinstance(ensemble, PDBConformation): ensemble = [ensemble] if len(atomics) != len(ensemble): raise ValueError('atomics and ensemble must have the same length') output = [] for i, conf in enumerate(ensemble): trans = conf.getTransformation() if trans is None: raise ValueError('transformations are not calculated, call ' '`superpose` or `iterpose`') ag = atomics[i] if not isinstance(ag, Atomic): LOGGER.warning( 'No atomic object found for conformation {0}.'.format(i)) output.append(None) continue output.append(trans.apply(ag)) if len(output) == 1: return output[0] else: return output
def calcTransformation(mobile, target, weights=None): """Returns a :class:`Transformation` instance which, when applied to the atoms in *mobile*, minimizes the weighted RMSD between *mobile* and *target*. *mobile* and *target* may be NumPy coordinate arrays, or :class:`.Atomic` instances, e.g. :class:`.AtomGroup`, :class:`.Chain`, or :class:`.Selection`.""" if not isinstance(mobile, np.ndarray): try: mob = mobile._getCoords() except AttributeError: raise TypeError('mobile must be a numpy array or an object ' 'with getCoords method') else: mob = mobile if not isinstance(target, np.ndarray): try: tar = target._getCoords() except AttributeError: raise TypeError('target must be a numpy array or an object ' 'with getCoords method') else: tar = target if mob.shape != tar.shape: raise ValueError('reference and target coordinate arrays ' 'must have same number of atoms') if mob.shape[1] != 3: raise ValueError('reference and target must be coordinate arrays') if weights is None: if isinstance(mobile, AtomMap): LOGGER.warn( 'mobile is an AtomMap instance, consider assign weights=mobile.getFlags("mapped") ' 'if there are dummy atoms in mobile') if isinstance(target, AtomMap): LOGGER.warn( 'target is an AtomMap instance, consider assign weights=target.getFlags("mapped") ' 'if there are dummy atoms in target') if weights is not None: weights = checkWeights(weights, mob.shape[0]) return Transformation(*getTransformation(mob, tar, weights))
def pathPDBFolder(folder=None, divided=False): """Returns or specify local PDB folder for storing PDB files downloaded from `wwPDB <http://www.wwpdb.org/>`_ servers. Files stored in this folder can be accessed via :func:`.fetchPDB` from any working directory. To release the current folder, pass an invalid path, e.g. ``folder=''``. If *divided* is **True**, the divided folder structure of wwPDB servers will be assumed when reading from and writing to the local folder. For example, a structure with identifier **1XYZ** will be present as :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`. If *divided* is **False**, a plain folder structure will be expected and adopted when saving files. For example, the same structure will be present as :file:`pdblocalfolder/1xyz.pdb.gz`. Finally, in either case, lower case letters will be used and compressed files will be stored.""" if folder is None: folder = SETTINGS.get('pdb_local_folder') if folder: if isdir(folder): return folder, SETTINGS.get('pdb_local_divided', True) else: LOGGER.warn('PDB local folder {0} is not a accessible.'.format( repr(folder))) else: if isdir(folder): folder = abspath(folder) LOGGER.info('Local PDB folder is set: {0}'.format(repr(folder))) if divided: LOGGER.info('wwPDB divided folder structure will be assumed.') else: LOGGER.info('A plain folder structure will be assumed.') SETTINGS['pdb_local_folder'] = folder SETTINGS['pdb_local_divided'] = bool(divided) SETTINGS.save() else: current = SETTINGS.pop('pdb_local_folder') if current: LOGGER.info('PDB folder {0} is released.'.format( repr(current))) SETTINGS.pop('pdb_local_divided') SETTINGS.save() else: raise IOError('{0} is not a valid path.'.format(repr(folder)))
def _iterDonors(self): """Yield pairs of indices for donored atoms that are within the pointer. Use :meth:`setDonors` for setting donors.""" if self._ag._donors is None: LOGGER.warning('donors are not set, use `AtomGroup.setDonors`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 2 >= len(self): for a, b in self._ag._iterDonors(): if a in iset and b in iset: yield a, b else: for a, dmap in zip(indices, self._ag._domap[indices]): for b in dmap: if b > -1 and b in iset: yield a, b iset.remove(a)
def _iterNBExclusions(self): """Yield pairs of indices for nbexclusioned atoms that are within the pointer. Use :meth:`setNBExclusions` for setting nbexclusions.""" if self._ag._nbexclusions is None: LOGGER.warning( 'nbexclusions are not set, use `AtomGroup.setNBExclusions`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 2 >= len(self): for a, b in self._ag._iterNBExclusions(): if a in iset and b in iset: yield a, b else: for a, nbemap in zip(indices, self._ag._nbemap[indices]): for b in nbemap: if b > -1 and b in iset: yield a, b iset.remove(a)
def checkIdentifiers(*pdb): """Check whether *pdb* identifiers are valid, and replace invalid ones with **None** in place.""" identifiers = [] append = identifiers.append for pid in pdb: try: pid = pid.strip().lower() except AttributeError: LOGGER.warn('{0} is not a valid identifier.'.format(repr(pid))) append(None) else: if not (len(pid) == 4 and pid.isalnum()): LOGGER.warn('{0} is not a valid identifier.' .format(repr(pid))) append(None) else: append(pid) return identifiers
def pathPDBMirror(path=None, format=None): """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`. To release the current mirror, pass an invalid path, e.g. ``path=''``. If you are keeping a partial mirror, such as PDB files in :file:`/data/structures/divided/pdb/` folder, specify *format*, which is ``'pdb'`` in this case.""" if path is None: path = SETTINGS.get('pdb_mirror_path') format = SETTINGS.get('pdb_mirror_format', None) if path: if isdir(path): if format is None: return path else: return path, format else: LOGGER.warning( 'PDB mirror path {0} is not a accessible.'.format( repr(path))) else: if isdir(path): path = abspath(path) LOGGER.info('Local PDB mirror path is set: {0}'.format(repr(path))) SETTINGS['pdb_mirror_path'] = path SETTINGS['pdb_mirror_format'] = format SETTINGS.save() else: current = SETTINGS.pop('pdb_mirror_path') if current: LOGGER.info('PDB mirror {0} is released.'.format( repr(current))) SETTINGS.save() else: raise IOError('{0} is not a valid path.'.format(repr(path)))
def getCoordsets(self, indices=None): if self._closed: raise ValueError('I/O operation on closed file') if indices is None: indices = np.arange(self._n_csets) elif isinstance(indices, int): indices = np.array([indices]) elif isinstance(indices, slice): indices = np.arange(*indices.indices(self._n_csets)) indices.sort() elif isinstance(indices, (list, np.ndarray)): indices = np.unique(indices) else: raise TypeError('indices must be an integer or a list of integers') nfi = self._nfi self.reset() n_atoms = self.numSelected() coords = np.zeros((len(indices), n_atoms, 3), self._dtype) prev = 0 next = self.nextCoordset for i, index in enumerate(indices): diff = index - prev if diff > 1: self.skip(diff-1) xyz = next() if xyz is None: LOGGER.warning('Expected {0} frames, but parsed {1}.' .format(len(indices), i)) self.goto(nfi) return coords[:i] coords[i] = xyz prev = index self.goto(nfi) return coords
def __add__(self, other): """Returnss an :class:`.AtomMap` instance. Order of pointed atoms are preserved.""" try: ag = other.getAtomGroup() except AttributeError: raise TypeError('unsupported operand type(s) for +: {0} and ' '{1}'.format(repr(type(self).__name__), repr(type(other).__name__))) if ag != self._ag: raise ValueError('AtomPointer instances must point to the same ' 'AtomGroup instance') acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warning('Active coordset indices of atoms are not the same.' ' Result will have ACSI {0}.'.format(acsi)) title = '({0}) + ({1})'.format(str(self), str(other)) indices = concatenate([self._getIndices(), other._getIndices()]) dummies = 0 try: dummies += self.numDummies() except AttributeError: pass try: dummies += other.numDummies() except AttributeError: pass return AtomMap(ag, indices, acsi, title=title, intarrays=True, dummies=dummies)
def addNonstdAminoacid(resname, *properties): """Add non-standard amino acid *resname* with *properties* selected from: * {props} .. ipython:: python addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large', 'polar', 'surface') Default set of non-standard amino acids can be restored as follows: .. ipython:: python flagDefinition(reset='nonstdaa')""" resname = str(resname) if len(resname) > 4: LOGGER.warn('Residue name {0} is unusually long.'.format( repr(resname))) propset = set(properties) for cat, val in CATEGORIES.items(): intersection = val.intersection(propset) if intersection: if len(intersection) > 1: raise ValueError('amino acid properties {0} cannot be ' 'present together'.format(', '.join( [repr(prp) for prp in intersection]))) for prop in intersection: propset.remove(prop) if propset: raise ValueError('amino acid property {0} is not valid'.format( repr(propset.pop()))) nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD) nonstd[resname] = set(properties) updateNonstandard(nonstd)
def iterpose(self, rmsd=0.0001): """Iteratively superpose the ensemble until convergence. Initially, all conformations are aligned with the reference coordinates. Then mean coordinates are calculated, and are set as the new reference coordinates. This is repeated until reference coordinates do not change. This is determined by the value of RMSD between the new and old reference coordinates. Note that at the end of the iterative procedure the reference coordinate set will be average of conformations in the ensemble. :arg rmsd: change in reference coordinates to determine convergence, default is 0.0001 Å RMSD :type rmsd: float""" if self._coords is None: raise AttributeError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise AttributeError('conformations are not set, use' '`addCoordset`') LOGGER.info('Starting iterative superposition:') LOGGER.timeit('_prody_ensemble') rmsdif = 1 step = 0 weights = self._weights length = len(self) if weights is not None: if weights.ndim == 3: weightsum = weights.sum(axis=0) weightsum[weightsum == 0.] = 1. # add pseudocount to avoid nan else: weightsum = length while rmsdif > rmsd: self._superpose() if weights is None: newxyz = self._confs.sum(0) / length else: newxyz = (self._confs * weights).sum(0) / weightsum rmsdif = getRMSD(self._coords, newxyz) self._coords = newxyz step += 1 LOGGER.info('Step #{0}: RMSD difference = {1:.4e}'.format( step, rmsdif)) LOGGER.report('Iterative superposition completed in %.2fs.', '_prody_ensemble')
def parseDCD(filename, start=None, stop=None, step=None, astype=None): """Parse CHARMM format DCD files (also NAMD 2.1 and later). Returns an :class:`Ensemble` instance. Conformations in the ensemble will be ordered as they appear in the trajectory file. Use :class:`DCDFile` class for parsing coordinates of a subset of atoms. :arg filename: DCD filename :type filename: str :arg start: index of first frame to read :type start: int :arg stop: index of the frame that stops reading :type stop: int :arg step: steps between reading frames, default is 1 meaning every frame :type step: int :arg astype: cast coordinate array to specified type :type astype: type""" dcd = DCDFile(filename, astype=astype) time_ = time() n_frames = dcd.numFrames() LOGGER.info('DCD file contains {0} coordinate sets for {1} atoms.'.format( n_frames, dcd.numAtoms())) ensemble = dcd[slice(start, stop, step)] dcd.close() time_ = time() - time_ or 0.01 dcd_size = 1.0 * dcd.numFrames() * dcd._bytes_per_frame / (1024 * 1024) LOGGER.info('DCD file was parsed in {0:.2f} seconds.'.format(time_)) LOGGER.info('{0:.2f} MB parsed at input rate {1:.2f} MB/s.'.format( dcd_size, dcd_size / time_)) LOGGER.info('{0} coordinate sets parsed at input rate {1} frame/s.'.format( n_frames, int(n_frames / time_))) return ensemble
def calcMSF(coordsets): """Calculate mean square fluctuation(s) (MSF).""" try: ncsets = coordsets.numFrames() except AttributeError: try: coordsets = coordsets.getCoordsets() except AttributeError: pass try: ndim, shape = coordsets.ndim, coordsets.shape except: raise TypeError('coordsets must be a Numpy array or a ProDy ' 'object with `getCoordsets` method') if ndim != 3 or shape[0] == 1: raise ValueError('coordsets must contain multiple sets') msf = var(coordsets, 0).sum(1) else: nfi = coordsets.nextIndex() natoms = coordsets.numSelected() total = zeros((natoms, 3)) sqsum = zeros((natoms, 3)) LOGGER.progress( 'Evaluating {0} frames from {1}:'.format(ncsets, str(coordsets)), ncsets, '_prody_calcMSF') ncsets = 0 coordsets.reset() for frame in coordsets: frame.superpose() coords = frame._getCoords() total += coords sqsum += coords**2 ncsets += 1 LOGGER.update(ncsets, label='_prody_calcMSF') LOGGER.finish() msf = (sqsum / ncsets - (total / ncsets)**2).sum(1) coordsets.goto(nfi) return msf
def run(self, tmax=200, li=0.2, lf=0.01, ei=0.3, ef=0.05, Ti=0.1, Tf=2, c=0, calcC=False): LOGGER.info('Building coordinates from electron density map. This may take a while.') LOGGER.timeit('_prody_make_nodes') tmax = int(tmax * self.N) li = li * self.N if calcC: Ti = Ti * self.N Tf = Tf * self.N for t in range(1, tmax + 1): # calc the parameters tt = float(t) / tmax l = li * np.power(lf / li, tt) ep = ei * np.power(ef / ei, tt) if calcC: T = Ti * np.power(Tf / Ti, tt) else: T = -1 self.runOnce(t, l, ep, T, c) LOGGER.report('{0} pseudoatoms were fitted in %.2fs.'.format( self.N), '_prody_make_nodes') return
def fetchPDBviaHTTP(*pdb, **kwargs): """Retrieve PDB file(s) for specified *pdb* identifier(s) and return path(s). Downloaded files will be stored in local PDB folder, if one is set using :meth:`.pathPDBFolder`, and copied into *folder*, if specified by the user. If no destination folder is specified, files will be saved in the current working directory. If *compressed* is **False**, decompressed files will be copied into *folder*.""" if kwargs.get('check', True): identifiers = checkIdentifiers(*pdb) else: identifiers = list(pdb) output_folder = kwargs.pop('folder', None) compressed = bool(kwargs.pop('compressed', True)) extension = '.pdb' local_folder = pathPDBFolder() if local_folder: local_folder, is_divided = local_folder if is_divided: getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])), 'pdb' + pdb + '.pdb.gz') else: getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz') if output_folder is None: second = lambda filename, pdb: filename else: if compressed: second = lambda filename, pdb: (copyFile(filename, join(output_folder, pdb + extension + '.gz'))) else: second = lambda filename, pdb: gunzip(filename, join(output_folder, pdb + extension)) else: if output_folder is None: output_folder = getcwd() if compressed: getPath = lambda pdb: join(output_folder, pdb + extension + '.gz') second = lambda filename, pdb: filename else: getPath = lambda pdb: join(output_folder, pdb + extension) second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb)) getURL = WWPDB_HTTP_URL[wwPDBServer() or 'us'] success = 0 failure = 0 filenames = [] for pdb in identifiers: if pdb is None: filenames.append(None) continue try: handle = openURL(getURL(pdb)) except Exception as err: LOGGER.warn('{0} download failed ({1}).'.format(pdb, str(err))) failure += 1 filenames.append(None) else: data = handle.read() if len(data): filename = getPath(pdb) with open(filename, 'w+b') as pdbfile: pdbfile.write(data) filename = normpath(relpath(second(filename, pdb))) LOGGER.debug('{0} downloaded ({1})' .format(pdb, sympath(filename))) success += 1 filenames.append(filename) else: LOGGER.warn('{0} download failed, reason unknown.' .format(pdb)) failure += 1 filenames.append(None) LOGGER.debug('PDB download via HTTP completed ({0} downloaded, ' '{1} failed).'.format(success, failure)) if len(identifiers) == 1: return filenames[0] else: return filenames
def fetchPDBviaFTP(*pdb, **kwargs): """Retrieve PDB (default), PDBML, mmCIF, or EMD file(s) for specified *pdb* identifier(s) and return path(s). Downloaded files will be stored in local PDB folder, if one is set using :meth:`.pathPDBFolder`, and copied into *folder*, if specified by the user. If no destination folder is specified, files will be saved in the current working directory. If *compressed* is **False**, decompressed files will be copied into *folder*. *format* keyword argument can be used to retrieve `PDBML <http://pdbml.pdb.org/>`_, `mmCIF <http://mmcif.pdb.org/>`_ and `PDBML <ftp://ftp.wwpdb.org/pub/emdb/doc/Map-format/current/EMDB_map_format.pdf>`_ files: ``format='cif'`` will fetch an mmCIF file, ``format='emd'`` will fetch an EMD file, and ``format='xml'`` will fetch a PDBML file. If PDBML header file is desired, ``noatom=True`` argument will do the job.""" if kwargs.get('check', True): identifiers = checkIdentifiers(*pdb) else: identifiers = list(pdb) output_folder = kwargs.pop('folder', None) compressed = bool(kwargs.pop('compressed', True)) format = str(kwargs.pop('format', 'pdb')).lower() noatom = bool(kwargs.pop('noatom', False)) if format == 'pdb': ftp_divided = 'pdb/data/structures/divided/pdb' ftp_pdbext = '.ent.gz' ftp_prefix = 'pdb' extension = '.pdb' elif format == 'xml': if noatom: ftp_divided = 'pdb/data/structures/divided/XML-noatom' ftp_pdbext = '-noatom.xml.gz' extension = '-noatom.xml' else: ftp_divided = 'pdb/data/structures/divided/XML' ftp_pdbext = '.xml.gz' extension = '.xml' ftp_prefix = '' elif format == 'cif': ftp_divided = 'pdb/data/structures/divided/mmCIF' ftp_pdbext = '.cif.gz' ftp_prefix = '' extension = '.cif' elif format == 'emd' or format == 'map': ftp_divided = 'emdb/structures' ftp_pdbext = '.map.gz' ftp_prefix = 'emd_' extension = '.map' else: raise ValueError(repr(format) + ' is not valid format') local_folder = pathPDBFolder() if format == 'pdb' and local_folder: local_folder, is_divided = local_folder if is_divided: getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])), 'pdb' + pdb + '.pdb.gz') else: getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz') if output_folder is None: second = lambda filename, pdb: filename else: if compressed: second = lambda filename, pdb: (copyFile(filename, join(output_folder, pdb + extension + '.gz'))) else: second = lambda filename, pdb: gunzip(filename, join(output_folder, pdb + extension)) else: if output_folder is None: output_folder = getcwd() if compressed: getPath = lambda pdb: join(output_folder, pdb + extension + '.gz') second = lambda filename, pdb: filename else: getPath = lambda pdb: join(output_folder, pdb + extension) second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb)) ftp_name, ftp_host, ftp_path = WWPDB_FTP_SERVERS[wwPDBServer() or 'us'] LOGGER.debug('Connecting wwPDB FTP server {0}.'.format(ftp_name)) from ftplib import FTP try: ftp = FTP(ftp_host) except Exception as error: raise type(error)('FTP connection problem, potential reason: ' 'no internet connectivity') else: success = 0 failure = 0 filenames = [] ftp.login('') for pdb in identifiers: if pdb is None: filenames.append(None) continue data = [] ftp_fn = ftp_prefix + pdb + ftp_pdbext try: ftp.cwd(ftp_path) ftp.cwd(ftp_divided) if format == 'emd': ftp.cwd('EMD-{0}/map'.format(pdb)) else: ftp.cwd(pdb[1:3]) ftp.retrbinary('RETR ' + ftp_fn, data.append) except Exception as error: if ftp_fn in ftp.nlst(): LOGGER.warn('{0} download failed ({1}). It is ' 'possible that you do not have rights to ' 'download .gz files in the current network.' .format(pdb, str(error))) else: LOGGER.info('{0} download failed. {1} does not exist ' 'on {2}.'.format(ftp_fn, pdb, ftp_host)) failure += 1 filenames.append(None) else: if len(data): filename = getPath(pdb) with open(filename, 'w+b') as pdbfile: write = pdbfile.write [write(block) for block in data] filename = normpath(relpath(second(filename, pdb))) LOGGER.debug('{0} downloaded ({1})' .format(pdb, sympath(filename))) success += 1 filenames.append(filename) else: LOGGER.warn('{0} download failed, reason unknown.' .format(pdb)) failure += 1 filenames.append(None) ftp.quit() LOGGER.debug('PDB download via FTP completed ({0} downloaded, ' '{1} failed).'.format(success, failure)) if len(identifiers) == 1: return filenames[0] else: return filenames
def parseImagesFromSTAR(particlesSTAR, **kwargs): """ Parses particle images using data from a STAR file containing information about them. :arg particlesSTAR: a filename for a STAR file. :type particlesSTAR: str :arg block_indices: indices for data blocks containing rows corresponding to images of interest The indexing scheme is similar to that for numpy arrays. Default behavior is use all data blocks about images :type block_indices: list, :class:`~numpy.ndarray` :arg row_indices: indices for rows corresponding to images of interest The indexing scheme is similar to that for numpy arrays. row_indices should be a 1D or 2D array-like. 2D row_indices should contain an entry for each relevant loop. If a 1D array-like is given the same row indices will be applied to all loops. Default behavior is to use all rows about images :type row_indices: list, :class:`~numpy.ndarray` :arg particle_indices: indices for particles regardless of STAR structure default is take all particles Please note: this acts after block_indices and row_indices :type particle_indices: list, :class"`~numpy.ndarray` :arg saveImageArrays: whether to save the numpy array for each image to file default is False :type saveImageArrays: bool :arg saveDirectory: directory where numpy image arrays are saved default is None, which means save to the current working directory :type saveDirectory: str, None :arg rotateImages: whether to apply in plane translations and rotations using provided psi and origin data, default is True :type rotateImages: bool """ try: from skimage.transform import rotate except ImportError: raise ImportError('This function requires scikit-image.') block_indices = kwargs.get('block_indices', None) # No loop_indices because data blocks about particle images contain 1 loop row_indices = kwargs.get('row_indices', None) particle_indices = kwargs.get('particle_indices', None) saveImageArrays = kwargs.get('saveImageArrays', False) saveDirectory = kwargs.get('saveDirectory', None) rotateImages = kwargs.get('rotateImages', True) try: particlesSTAR = parseSTAR(particlesSTAR) except: raise ValueError('particlesSTAR should be a filename for a STAR file') # Check dimensions/contents of particlesSTAR and generate full indices dataBlocks = [] loops = [] maxLoops = 0 maxRows = 0 dataBlock_goodness = [] for dataBlock in particlesSTAR: foundImageField = False for loop in dataBlock: if ('_image' in loop.fields) or ('_rlnImageName' in loop.fields): foundImageField = True loops.append(loop) if loop.numRows > maxRows: maxRows = loop.numRows else: dataBlock.pop(int(loop.getTitle().split(' ')[-1])) if dataBlock.numLoops > maxLoops: maxLoops = dataBlock.numLoops if foundImageField: dataBlocks.append(dataBlock) dataBlock_goodness.append(True) else: dataBlock_goodness.append(False) indices = np.zeros((len(dataBlocks), maxLoops, maxRows, 3), dtype=int) i = -1 for n, dataBlock in enumerate(particlesSTAR): if dataBlock_goodness[n]: i += 1 for j, loop in enumerate(dataBlock): for k in range(maxRows): if k < loop.numRows: indices[i, j, k] = np.array([n, j, k]) else: indices[i, j, k] = np.array([0, 0, 0]) dataBlocks = np.array(dataBlocks) loops = np.array(loops) # Convert keyword indices to valid indices if possible if block_indices is not None: if np.array_equal(dataBlocks, np.array([])): raise TypeError( 'particlesSTAR must have data blocks to use block_indices') try: block_indices = np.array(block_indices) except: raise TypeError('block_indices should be array-like') if block_indices.ndim != 1: raise ValueError( 'block_indices should be a 1-dimensional array-like') for i, index in enumerate(list(reversed(block_indices))): try: block = particlesSTAR[index] if not isinstance(block, StarDataBlock): LOGGER.warn( 'There is no block corresponding to block_index {0}. ' 'This index has been removed.'.format( block_indices.shape[0] - i - 1)) block_indices = np.delete(block_indices, i, 0) except: LOGGER.warn( 'There is no block corresponding to block_index {0}. ' 'This index has been removed.'.format( block_indices.shape[0] - i - 1)) block_indices = np.delete(block_indices, i, 0) if not np.array_equal(block_indices, np.array([])): indices = np.concatenate(([ indices[np.where(indices[:, 0, 0, 0] == item)] for item in block_indices ]), axis=0) else: LOGGER.warn( 'None of the block_indices corresponded to dataBlocks. ' 'Default block indices corresponding to all dataBlocks ' 'will be used instead.') dataBlocks = particlesSTAR[block_indices] if row_indices is not None: try: row_indices = np.array(row_indices) except: raise TypeError('row_indices should be array-like') if row_indices.ndim == 1: if isinstance(row_indices[0], int): # row_indices provided was truly 1D so # we will use same row indices for all data blocks # and warn the user we are doing so if len(dataBlocks) != 1: LOGGER.warn( 'row_indices is 1D but there are multiple data blocks ' 'so the same row indices will be used for each') row_indices = np.array( [row_indices for i in range(len(dataBlocks))]) # This also works if len(dataBlocks) == 1 elif isinstance(row_indices[0], (list, tuple)): # A list-like of list-likes of different sizes was provided # We turn it into a proper 2D array by filling the short # list likes with zeros if len(row_indices) != len(dataBlocks): raise ValueError( 'There should be an entry in row indices for ' 'each data block') max_len = 0 for entry in row_indices: if not np.isscalar(entry): if len(entry) > max_len: max_len = len(entry) row_indices_list_entries = [] for entry in row_indices: if isinstance(entry, int): list_entry = [entry] else: list_entry = list(entry) while len(list_entry) < max_len: list_entry.append(0) row_indices_list_entries.append(list_entry) row_indices = np.array(row_indices_list_entries) elif row_indices.ndim == 2: # A list-like of list-likes of the same size was provided if row_indices.shape[0] != len(dataBlocks): if len(row_indices) == 1: # we will use same row indices for all data blocks # and warn the user we are doing so if len(dataBlocks) != 1: LOGGER.warn( 'row_indices has one entry but there are multiple data blocks ' 'so the same row indices will be used for each') row_indices = np.array( [row_indices[0] for i in range(len(dataBlocks))]) # This also works if len(dataBlocks) == 1 else: raise ValueError( 'There should be an entry in row indices for ' 'each data block') else: raise ValueError( 'row_indices should be 1D or 2D array-like objects') # indices need updating good_indices_list = [] for i, index_i in enumerate(indices): good_indices_list.append([]) for j, index_j in enumerate(index_i): good_indices_list[i].append([]) for r, index_r in enumerate(row_indices[i]): for k, index_k in enumerate(index_j): if k == index_r: if not (r != 0 and index_r == 0): good_indices_list[i][j].append(index_k) else: good_indices_list[i][j].append( np.array([0, 0, 0])) indices = np.array(good_indices_list) if indices is np.array([]): raise ValueError( 'selection does not contain any rows with image fields') # Use indices to collect particle data dictionaries particles = [] for i, index_i in enumerate(indices): for j, index_j in enumerate(index_i): for k, index_k in enumerate(index_j): if not (np.array_equal(index_k, np.array([0, 0, 0])) and not (i == 0 and j == 0 and k == 0)): particles.append( particlesSTAR[index_k[0]][index_k[1]][index_k[2]]) if particle_indices is None: particle_indices = list(range(len(particles))) # Parse images using particle dictionaries image_stacks = dict() images = [] parsed_images_data = [] stk_images = [] if particlesSTAR._prog == 'XMIPP': imageFieldKey = '_image' else: imageFieldKey = '_rlnImageName' for i in particle_indices: particle = particles[i] try: image_field = particle[imageFieldKey] image_index = int(image_field.split('@')[0]) - 1 filename = image_field.split('@')[1] except: raise ValueError( 'particlesSTAR does not contain data about particle image ' '{0} location in either RELION or XMIPP format'.format(i)) if filename.endswith('.stk'): stk_images.append(str(i)) continue if not filename in list(image_stacks.keys()): image_stacks[filename] = parseEMD(filename).density image = image_stacks[filename][image_index] parsed_images_data.append(image_field) if saveImageArrays: if saveDirectory is not None: np.save('{0}/{1}'.format(saveDirectory, i), image) else: np.save('{1}'.format(i), image) if rotateImages: if particlesSTAR._prog == 'RELION': anglePsi = float(particle['_rlnAnglePsi']) originX = float(particle['_rlnOriginX']) originY = float(particle['_rlnOriginY']) elif particlesSTAR._prog == 'XMIPP': anglePsi = float(particle['_anglePsi']) originX = float(particle['_shiftX']) originY = float(particle['_shiftY']) images.append( rotate(image, anglePsi, center=(float(image.shape[0]) - originX, float(image.shape[1]) - originY))) else: images.append(image) if len(stk_images) > 0: LOGGER.warn( 'ProDy currently cannot parse images from XMIPP .stk files. ' 'Please be aware that images {0} and {1} will be missing ' 'from the final array.'.format(', '.join(stk_images[:-1]), stk_images[-1])) return np.array(images), parsed_images_data
def _superpose(self, **kwargs): """Superpose conformations and update coordinates.""" ref = kwargs.pop('ref', None) indices = self._indices weights = self._weights mobs = self._confs if indices is None: idx = False tar = self._coords movs = None else: idx = True if self._weights is not None: weights = weights[indices] tar = self._coords[indices] movs = self._confs linalg = importLA() svd = linalg.svd det = linalg.det if weights is None: if ref is None: tar_com = tar.mean(0) else: tar_com = tar[ref] tar_org = (tar - tar_com) mob_org = zeros(tar_org.shape, dtype=mobs.dtype) tar_org = tar_org.T else: weights_sum = weights.sum() weights_dot = dot(weights.T, weights) if ref is None: tar_com = (tar * weights).sum(axis=0) / weights_sum else: tar_com = (tar[ref] * weights[ref]).sum(axis=0) / sum( weights[ref]) tar_org = (tar - tar_com) mob_org = zeros(tar_org.shape, dtype=mobs.dtype) LOGGER.progress('Superposing ', len(mobs), '_prody_ensemble') for i, mob in enumerate(mobs): if idx: mob = mob[indices] if weights is None: mob_com = mob.mean(0) matrix = dot(tar_org, subtract(mob, mob_com, mob_org)) else: mob_com = (mob * weights).sum(axis=0) / weights_sum subtract(mob, mob_com, mob_org) matrix = dot((tar_org * weights).T, (mob_org * weights)) / weights_dot U, s, Vh = svd(matrix) Id = array([[1, 0, 0], [0, 1, 0], [0, 0, sign(det(matrix))]]) rotation = dot(Vh.T, dot(Id, U.T)) if movs is None: mobs[i] = dot(mob_org, rotation) add(mobs[i], tar_com, mobs[i]) else: add(dot(movs[i], rotation), (tar_com - dot(mob_com, rotation)), movs[i]) LOGGER.update(i + 1, label='_prody_ensemble') LOGGER.finish()
def parseMMCIFStream(stream, **kwargs): """Returns an :class:`.AtomGroup` and/or a class:`.StarDict` containing header data parsed from a stream of CIF lines. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" model = kwargs.get('model') subset = kwargs.get('subset') chain = kwargs.get('chain') altloc = kwargs.get('altloc', 'A') header = kwargs.get('header', False) if model is not None: if isinstance(model, int): if model < 0: raise ValueError('model must be greater than 0') else: raise TypeError('model must be an integer, {0} is invalid' .format(str(model))) title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset' .format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = '_' + chain + title_suffix ag = None if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() elif model != 0: ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) n_csets = 0 if model != 0: LOGGER.timeit() try: lines = stream.readlines() except AttributeError as err: try: lines = stream.read().split('\n') except AttributeError: raise err if not len(lines): raise ValueError('empty PDB file or stream') if header: ag, header = _parseMMCIFLines(ag, lines, model, chain, subset, altloc, header) else: ag = _parseMMCIFLines(ag, lines, model, chain, subset, altloc, header) if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate set(s) were ' 'parsed in %.2fs.'.format(ag.numAtoms(), ag.numCoordsets() - n_csets)) else: ag = None LOGGER.warn('Atomic data could not be parsed, please ' 'check the input file.') if header: return ag, StarDict(*header, title=str(kwargs.get('title', 'Unknown'))) return ag
def _parseMMCIFLines(atomgroup, lines, model, chain, subset, altloc_torf, header): """Returns an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: mmCIF lines """ if subset is not None: if subset == 'ca': subset = set(('CA',)) elif subset in 'bb': subset = flags.BACKBONE protein_resnames = flags.AMINOACIDS asize = 0 i = 0 models = [] nModels = 0 fields = dict() fieldCounter = -1 foundAtomBlock = False doneAtomBlock = False start = 0 stop = 0 while not doneAtomBlock: line = lines[i] if line[:11] == '_atom_site.': fieldCounter += 1 fields[line.split('.')[1].strip()] = fieldCounter if line.startswith('ATOM') or line.startswith('HETATM'): if not foundAtomBlock: foundAtomBlock = True start = i models.append(line.split()[fields['pdbx_PDB_model_num']]) if models[asize] != models[asize-1]: nModels += 1 asize += 1 else: if foundAtomBlock: doneAtomBlock = True stop = i i += 1 if nModels == 0: nModels = 1 if model is not None and model != 1: for i in range(start, stop): if str(models[i]) != model and str(models[i+1]) == model: start = i+1 if str(models[i]) == model and str(models[i+1]) != model: stop = i+1 break if not str(model) in models: raise mmCIFParseError('model {0} is not found'.format(model)) addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True if isinstance(altloc_torf, str): if altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0}.' .format(altloc_torf)) which_altlocs = '.' + ''.join(altloc_torf.split()) else: which_altlocs = '.A' altloc_torf = False else: which_altlocs = '.A' altloc_torf = True coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype) hetero = np.zeros(asize, dtype=bool) termini = np.zeros(asize, dtype=bool) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms acount = 0 for line in lines[start:stop]: startswith = line.split()[fields['group_PDB']] atomname = line.split()[fields['auth_atom_id']] resname = line.split()[fields['auth_comp_id']] if subset is not None: if not (atomname in subset and resname in protein_resnames): continue chID = line.split()[fields['auth_asym_id']] if chain is not None: if isinstance(chain, str): chain = chain.split(',') if not chID in chain: continue segID = line.split()[fields['label_asym_id']] alt = line.split()[fields['label_alt_id']] if alt not in which_altlocs: continue if model is not None: if int(models[acount]) < model: continue elif int(models[acount]) > model: break coordinates[acount] = [line.split()[fields['Cartn_x']], line.split()[fields['Cartn_y']], line.split()[fields['Cartn_z']]] atomnames[acount] = atomname resnames[acount] = resname resnums[acount] = line.split()[fields['auth_seq_id']] chainids[acount] = chID segnames[acount] = segID hetero[acount] = startswith == 'HETATM' # True or False if chainids[acount] != chainids[acount-1]: termini[acount-1] = True altlocs[acount] = alt icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']] if icodes[acount] == '?': icodes[acount] = '' serials[acount] = line.split()[fields['id']] elements[acount] = line.split()[fields['type_symbol']] bfactors[acount] = line.split()[fields['B_iso_or_equiv']] occupancies[acount] = line.split()[fields['occupancy']] acount += 1 if model is not None: nModels = 1 modelSize = acount//nModels if addcoords: atomgroup.addCoordset(coordinates[:modelSize]) else: atomgroup._setCoords(coordinates[:modelSize]) atomgroup.setNames(atomnames[:modelSize]) atomgroup.setResnames(resnames[:modelSize]) atomgroup.setResnums(resnums[:modelSize]) atomgroup.setSegnames(segnames[:modelSize]) atomgroup.setChids(chainids[:modelSize]) atomgroup.setFlags('hetatm', hetero[:modelSize]) atomgroup.setFlags('pdbter', termini[:modelSize]) atomgroup.setAltlocs(altlocs[:modelSize]) atomgroup.setIcodes(icodes[:modelSize]) atomgroup.setSerials(serials[:modelSize]) atomgroup.setElements(elements[:modelSize]) from caviar.prody_parser.utilities.misctools import getMasses atomgroup.setMasses(getMasses(elements[:modelSize])) atomgroup.setBetas(bfactors[:modelSize]) atomgroup.setOccupancies(occupancies[:modelSize]) for n in range(1, nModels): atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize]) if header: header = parseSTARLines(lines[:start-fieldCounter-2] + lines[stop:], shlex=True) return atomgroup, header return atomgroup