示例#1
0
    def __or__(self, other):

        if self is other:
            return self

        try:
            ag = other.getAtomGroup()
        except AttributeError:
            raise TypeError('other must be an AtomPointer')

        if self._ag != ag:
            raise ValueError('both selections must be from the same AtomGroup')

        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warn('Active coordinate set indices do not match, it will '
                        'be set to zero.')
            acsi = 0

        indices = unique(concatenate(
            (self._getIndices(), other._getIndices())))
        if indices[-1] == atommap.DUMMY:
            indices = indices[:-1]
        return Selection(self._ag,
                         indices,
                         '({0}) or ({1})'.format(self.getSelstr(),
                                                 other.getSelstr()),
                         acsi,
                         unique=True)
示例#2
0
    def _superpose(self, **kwargs):
        """Superpose conformations and update coordinates."""

        calcT = getTransformation
        if kwargs.get('trans', False):
            if self._trans is not None:
                LOGGER.info('Existing transformations will be overwritten.')
            trans = np.zeros((self._n_csets, 4, 4))
        else:
            trans = None
        indices = self._indices
        if indices is None:
            weights = self._weights
            coords = self._coords
            confs = self._confs
            confs_selected = self._confs
        else:
            weights = self._weights[:, indices]
            coords = self._coords[indices]
            confs = self._confs
            confs_selected = self._confs[:, indices]

        for i, conf in enumerate(confs_selected):
            rmat, tvec = calcT(conf, coords, weights[i])
            if trans is not None:
                trans[i][:3, :3] = rmat
                trans[i][:3, 3] = tvec
            confs[i] = tvec + np.dot(confs[i], rmat.T)
        self._trans = trans
示例#3
0
def wwPDBServer(*key):
    """Set/get `wwPDB`_ FTP/HTTP server location used for downloading PDB
    structures.  Use one of the following keywords for setting a server:

    +---------------------------+-----------------------------+
    | wwPDB FTP server          | *Key* (case insensitive)    |
    +===========================+=============================+
    | RCSB PDB (USA) (default)  | RCSB, USA, US               |
    +---------------------------+-----------------------------+
    | PDBe (Europe)             | PDBe, Europe, Euro, EU      |
    +---------------------------+-----------------------------+
    | PDBj (Japan)              | PDBj, Japan, Jp             |
    +---------------------------+-----------------------------+

    .. _wwPDB: http://www.wwpdb.org/"""

    if not key:
        return SETTINGS.get('wwpdb', None)
    elif len(key) == 1:
        try:
            key = key[0].lower()
        except AttributeError:
            raise TypeError('key must be a string')
        if key in WWPDB_FTP_SERVERS:
            SETTINGS['wwpdb'] = key
            SETTINGS.save()
            LOGGER.info('wwPDB server is set to {}.'
                        .format(WWPDB_FTP_SERVERS[key][0]))
        else:
            raise ValueError('{0} is not a valid wwPDB server identifier'
                             .format(repr(key)))
    else:
        raise TypeError('one wwPDB server identifier is expected, {0} given'
                        .format(len(key)))
示例#4
0
def alignCoordsets(atoms, weights=None):
    """Returns *atoms* after superposing coordinate sets onto its active
    coordinate set.  Transformations will be calculated for *atoms* and
    applied to its :class:`.AtomGroup`, when applicable.  Optionally,
    atomic *weights* can be passed for weighted superposition."""

    try:
        acsi, n_csets = atoms.getACSIndex(), atoms.numCoordsets()
    except AttributeError:
        raise TypeError('atoms must have type Atomic, not {0}'.format(
            type(atoms)))
        if n_csets < 2:
            LOGGER.warning('{0} contains fewer than two coordinate sets, '
                           'alignment was not performed.'.format(str(atoms)))
            return

    try:
        ag = atoms.getAtomGroup()
    except AttributeError:
        ag = atoms
    agacsi = ag.getACSIndex()

    tar = atoms._getCoords()
    for i in range(n_csets):
        if i == acsi:
            continue
        atoms.setACSIndex(i)
        ag.setACSIndex(i)
        calcTransformation(atoms, tar, weights).apply(ag)
    atoms.setACSIndex(acsi)
    ag.setACSIndex(agacsi)
    return atoms
示例#5
0
    def getCoordsets(self, indices=None):
        """Returns coordinate sets at given *indices*. *indices* may be an
        integer, a list of integers or **None**. **None** returns all
        coordinate sets."""

        if self._closed:
            raise ValueError('I/O operation on closed file')
        if (self._indices is None
                and (indices is None or indices == slice(None))):
            nfi = self._nfi
            self.reset()
            n_floats = self._n_floats + self._unitcell * 14
            n_atoms = self._n_atoms
            n_csets = self._n_csets
            data = self._file.read(self._itemsize * n_floats * n_csets)
            data = fromstring(data, self._dtype)
            if len(data) > n_floats * n_csets:
                n_csets = len(data) / n_floats
                data = data[:n_csets]
                LOGGER.warning('DCD is corrupt, {0} out of {1} frames '
                               'were parsed.'.format(n_csets, self._n_csets))
            data = data.reshape((n_csets, n_floats))
            if self._unitcell:
                data = data[:, 14:]
            data = data.reshape((n_csets, 3, n_atoms + 2))
            data = data[:, :, 1:-1]
            data = data.transpose(0, 2, 1)
            self.goto(nfi)
            if self._astype is not None and self._astype != data.dtype:
                data = data.astype(self._astype)
            return data
        else:
            return TrajFile.getCoordsets(self, indices)
示例#6
0
def loadAtoms(filename):
    """Returns :class:`.AtomGroup` instance loaded from *filename* using
    :func:`numpy.load` function.  See also :func:`saveAtoms`."""

    LOGGER.timeit('_prody_loadatoms')
    attr_dict = load(filename)
    files = set(attr_dict.files)

    if not 'n_atoms' in files:
        raise ValueError('{0} is not a valid atomic data file'.format(
            repr(filename)))
    title = str(attr_dict['title'])

    if 'coordinates' in files:
        coords = attr_dict['coordinates']
        ag = AtomGroup(title)
        ag._n_csets = int(attr_dict['n_csets'])
        ag._coords = coords
    ag._n_atoms = int(attr_dict['n_atoms'])
    ag._setTimeStamp()
    if 'flagsts' in files:
        ag._flagsts = int(attr_dict['flagsts'])

    if 'bonds' in files and 'bmap' in files and 'numbonds' in files:
        ag._bonds = attr_dict['bonds']
        ag._bmap = attr_dict['bmap']
        ag._data['numbonds'] = attr_dict['numbonds']

    skip_flags = set()

    for label, data in attr_dict.items():
        if label in SKIPLOAD:
            continue
        if data.ndim == 1 and data.dtype == bool:
            if label in skip_flags:
                continue
            else:
                ag._setFlags(label, data)
                skip_flags.update(flags.ALIASES.get(label, [label]))
        else:
            ag.setData(label, data)

    for label in ['segindex', 'chindex', 'resindex']:
        if label in attr_dict:
            ag._data[label] = attr_dict[label]

    if ag.numCoordsets() > 0:
        ag._acsi = 0

    if 'cslabels' in files:
        ag.setCSLabels(list(attr_dict['cslabels']))

    LOGGER.report('Atom group was loaded in %.2fs.', '_prody_loadatoms')
    return ag
示例#7
0
    def getDeviations(self):
        """Returns deviations from reference coordinates for selected atoms.
        Conformations can be aligned using one of :meth:`superpose` or
        :meth:`iterpose` methods prior to calculating deviations."""

        if not isinstance(self._confs, ndarray):
            LOGGER.warning('Conformations are not set.')
            return None
        if not isinstance(self._coords, ndarray):
            LOGGER.warning('Coordinates are not set.')
            return None

        return self._getCoordsets() - self._getCoords()
示例#8
0
    def superpose(self, **kwargs):
        """Superpose the ensemble onto the reference coordinates obtained by 
        :meth:`getCoords`.
        """

        trans = kwargs.pop('trans', True)
        if self._coords is None:
            raise ValueError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise ValueError('conformations are not set, use `addCoordset`')
        LOGGER.timeit('_prody_ensemble')
        self._superpose(trans=trans)  # trans kwarg is used by PDBEnsemble
        LOGGER.report('Superposition completed in %.2f seconds.',
                      '_prody_ensemble')
示例#9
0
    def __add__(self, other):
        """Concatenate ensembles. The reference coordinates, atoms, 
        and weights of *self* is used in the resulting ensemble."""

        if not isinstance(other, Ensemble):
            raise TypeError('an Ensemble instance cannot be added to an {0} '
                            'instance'.format(type(other)))
        elif self._n_atoms != other._n_atoms:
            raise ValueError('Ensembles must have same number of atoms.')

        ensemble = Ensemble('{0} + {1}'.format(self.getTitle(),
                                               other.getTitle()))
        if self._coords is not None:
            ensemble.setCoords(self._coords.copy())
        if self._confs is not None:
            ensemble.addCoordset(self._confs.copy())
        if other._confs is not None:
            ensemble.addCoordset(other._confs.copy())

        all_keys = list(self._data.keys()) + list(other._data.keys())
        for key in all_keys:
            if key in self._data and key in other._data:
                self_data = self._data[key]
                other_data = other._data[key]
            elif key in self._data:
                self_data = self._data[key]
                other_data = zeros(other.numConfs(), dtype=self_data.dtype)
            elif key in other._data:
                other_data = other._data[key]
                self_data = zeros(other.numConfs(), dtype=other_data.dtype)
            ensemble._data[key] = concatenate((self_data, other_data), axis=0)

        if self._weights is not None:
            LOGGER.info('Atom weights from {0} are used in {1}.'.format(
                repr(self._title), repr(ensemble.getTitle())))
            ensemble.setWeights(self._weights.copy())
        elif other._weights is not None:
            ensemble.setWeights(other._weights.copy())

        if self._atoms is not None:
            ensemble.setAtoms(self._atoms)
            ensemble._indices = self._indices
        else:
            ensemble.setAtoms(other._atoms)
            ensemble._indices = other._indices
        return ensemble
示例#10
0
    def superpose(self, **kwargs):
        """Superpose the ensemble onto the reference coordinates.
        
        :arg ref: index of the reference coordinate. If **None**, the average 
            coordinate will be assumed as the reference. Default is **None**
        :type ref: int
        """

        ref = kwargs.pop('ref', None)
        if self._coords is None:
            raise ValueError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise ValueError('conformations are not set, use `addCoordset`')
        LOGGER.timeit('_prody_ensemble')
        self._superpose(ref=ref)  # trans kwarg is used by PDBEnsemble
        LOGGER.report('Superposition completed in %.2f seconds.',
                      '_prody_ensemble')
示例#11
0
def alignAtomicsUsingEnsemble(atomics, ensemble):
    """Align a set of :class:`.Atomic` objects using transformations from *ensemble*, 
    which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. 
    
    Transformations will be applied based on indices so *atomics* and *ensemble* must 
    have the same number of members.

    :arg atomics: a set of :class:`.Atomic` objects to be aligned
    :type atomics: tuple, list, :class:`~numpy.ndarray`

    :arg ensemble: a :class:`.PDBEnsemble` or a :class:`.PDBConformation` from which 
                   transformations can be extracted
    :type ensemble: :class:`.PDBEnsemble`, :class:`.PDBConformation`
    """

    if not isListLike(atomics):
        raise TypeError('atomics must be list-like')

    if not isinstance(ensemble, (PDBEnsemble, PDBConformation)):
        raise TypeError('ensemble must be a PDBEnsemble or PDBConformation')
    if isinstance(ensemble, PDBConformation):
        ensemble = [ensemble]

    if len(atomics) != len(ensemble):
        raise ValueError('atomics and ensemble must have the same length')

    output = []
    for i, conf in enumerate(ensemble):
        trans = conf.getTransformation()
        if trans is None:
            raise ValueError('transformations are not calculated, call '
                             '`superpose` or `iterpose`')

        ag = atomics[i]
        if not isinstance(ag, Atomic):
            LOGGER.warning(
                'No atomic object found for conformation {0}.'.format(i))
            output.append(None)
            continue

        output.append(trans.apply(ag))

    if len(output) == 1:
        return output[0]
    else:
        return output
示例#12
0
def calcTransformation(mobile, target, weights=None):
    """Returns a :class:`Transformation` instance which, when applied to the
    atoms in *mobile*, minimizes the weighted RMSD between *mobile* and
    *target*.  *mobile* and *target* may be NumPy coordinate arrays, or
    :class:`.Atomic` instances, e.g. :class:`.AtomGroup`, :class:`.Chain`,
    or :class:`.Selection`."""

    if not isinstance(mobile, np.ndarray):
        try:
            mob = mobile._getCoords()
        except AttributeError:
            raise TypeError('mobile must be a numpy array or an object '
                            'with getCoords method')
    else:
        mob = mobile
    if not isinstance(target, np.ndarray):
        try:
            tar = target._getCoords()
        except AttributeError:
            raise TypeError('target must be a numpy array or an object '
                            'with getCoords method')
    else:
        tar = target

    if mob.shape != tar.shape:
        raise ValueError('reference and target coordinate arrays '
                         'must have same number of atoms')

    if mob.shape[1] != 3:
        raise ValueError('reference and target must be coordinate arrays')

    if weights is None:
        if isinstance(mobile, AtomMap):
            LOGGER.warn(
                'mobile is an AtomMap instance, consider assign weights=mobile.getFlags("mapped") '
                'if there are dummy atoms in mobile')

        if isinstance(target, AtomMap):
            LOGGER.warn(
                'target is an AtomMap instance, consider assign weights=target.getFlags("mapped") '
                'if there are dummy atoms in target')

    if weights is not None:
        weights = checkWeights(weights, mob.shape[0])

    return Transformation(*getTransformation(mob, tar, weights))
示例#13
0
def pathPDBFolder(folder=None, divided=False):
    """Returns or specify local PDB folder for storing PDB files downloaded from
    `wwPDB <http://www.wwpdb.org/>`_ servers.  Files stored in this folder can
    be accessed via :func:`.fetchPDB` from any working directory.  To release
    the current folder, pass an invalid path, e.g. ``folder=''``.

    If *divided* is **True**, the divided folder structure of wwPDB servers
    will be assumed when reading from and writing to the local folder.  For
    example, a structure with identifier **1XYZ** will be present as
    :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`.

    If *divided* is **False**, a plain folder structure will be expected and
    adopted when saving files.  For example, the same structure will be
    present as :file:`pdblocalfolder/1xyz.pdb.gz`.

    Finally, in either case, lower case letters will be used and compressed
    files will be stored."""

    if folder is None:
        folder = SETTINGS.get('pdb_local_folder')
        if folder:
            if isdir(folder):
                return folder, SETTINGS.get('pdb_local_divided', True)
            else:
                LOGGER.warn('PDB local folder {0} is not a accessible.'.format(
                    repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local PDB folder is set: {0}'.format(repr(folder)))
            if divided:
                LOGGER.info('wwPDB divided folder structure will be assumed.')
            else:
                LOGGER.info('A plain folder structure will be assumed.')
            SETTINGS['pdb_local_folder'] = folder
            SETTINGS['pdb_local_divided'] = bool(divided)
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_local_folder')
            if current:
                LOGGER.info('PDB folder {0} is released.'.format(
                    repr(current)))
                SETTINGS.pop('pdb_local_divided')
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(folder)))
示例#14
0
    def _iterDonors(self):
        """Yield pairs of indices for donored atoms that are within the pointer.
        Use :meth:`setDonors` for setting donors."""

        if self._ag._donors is None:
            LOGGER.warning('donors are not set, use `AtomGroup.setDonors`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 2 >= len(self):
            for a, b in self._ag._iterDonors():
                if a in iset and b in iset:
                    yield a, b
        else:
            for a, dmap in zip(indices, self._ag._domap[indices]):
                for b in dmap:
                    if b > -1 and b in iset:
                        yield a, b
                iset.remove(a)
示例#15
0
    def _iterNBExclusions(self):
        """Yield pairs of indices for nbexclusioned atoms that are within the pointer.
        Use :meth:`setNBExclusions` for setting nbexclusions."""

        if self._ag._nbexclusions is None:
            LOGGER.warning(
                'nbexclusions are not set, use `AtomGroup.setNBExclusions`')

        indices = self._getIndices()
        iset = set(indices)
        if len(self._ag) / 2 >= len(self):
            for a, b in self._ag._iterNBExclusions():
                if a in iset and b in iset:
                    yield a, b
        else:
            for a, nbemap in zip(indices, self._ag._nbemap[indices]):
                for b in nbemap:
                    if b > -1 and b in iset:
                        yield a, b
                iset.remove(a)
示例#16
0
def checkIdentifiers(*pdb):
    """Check whether *pdb* identifiers are valid, and replace invalid ones
    with **None** in place."""

    identifiers = []
    append = identifiers.append
    for pid in pdb:
        try:
            pid = pid.strip().lower()
        except AttributeError:
            LOGGER.warn('{0} is not a valid identifier.'.format(repr(pid)))
            append(None)
        else:
            if not (len(pid) == 4 and pid.isalnum()):
                LOGGER.warn('{0} is not a valid identifier.'
                            .format(repr(pid)))
                append(None)
            else:
                append(pid)
    return identifiers
示例#17
0
def pathPDBMirror(path=None, format=None):
    """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`.
    To release the current mirror, pass an invalid path, e.g. ``path=''``.
    If you are keeping a partial mirror, such as PDB files in
    :file:`/data/structures/divided/pdb/` folder, specify *format*, which is
    ``'pdb'`` in this case."""

    if path is None:
        path = SETTINGS.get('pdb_mirror_path')
        format = SETTINGS.get('pdb_mirror_format', None)
        if path:
            if isdir(path):
                if format is None:
                    return path
                else:
                    return path, format
            else:
                LOGGER.warning(
                    'PDB mirror path {0} is not a accessible.'.format(
                        repr(path)))
    else:
        if isdir(path):
            path = abspath(path)
            LOGGER.info('Local PDB mirror path is set: {0}'.format(repr(path)))
            SETTINGS['pdb_mirror_path'] = path
            SETTINGS['pdb_mirror_format'] = format
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_mirror_path')
            if current:
                LOGGER.info('PDB mirror {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(path)))
示例#18
0
    def getCoordsets(self, indices=None):

        if self._closed:
            raise ValueError('I/O operation on closed file')
        if indices is None:
            indices = np.arange(self._n_csets)
        elif isinstance(indices, int):
            indices = np.array([indices])
        elif isinstance(indices, slice):
            indices = np.arange(*indices.indices(self._n_csets))
            indices.sort()
        elif isinstance(indices, (list, np.ndarray)):
            indices = np.unique(indices)
        else:
            raise TypeError('indices must be an integer or a list of integers')

        nfi = self._nfi
        self.reset()

        n_atoms = self.numSelected()
        coords = np.zeros((len(indices), n_atoms, 3), self._dtype)

        prev = 0
        next = self.nextCoordset
        for i, index in enumerate(indices):
            diff = index - prev
            if diff > 1:
                self.skip(diff-1)
            xyz = next()
            if xyz is None:
                LOGGER.warning('Expected {0} frames, but parsed {1}.'
                               .format(len(indices), i))
                self.goto(nfi)
                return coords[:i]
            coords[i] = xyz
            prev = index

        self.goto(nfi)
        return coords
示例#19
0
    def __add__(self, other):
        """Returnss an :class:`.AtomMap` instance. Order of pointed atoms are
        preserved."""

        try:
            ag = other.getAtomGroup()
        except AttributeError:
            raise TypeError('unsupported operand type(s) for +: {0} and '
                            '{1}'.format(repr(type(self).__name__),
                                         repr(type(other).__name__)))

        if ag != self._ag:
            raise ValueError('AtomPointer instances must point to the same '
                             'AtomGroup instance')
        acsi = self.getACSIndex()
        if acsi != other.getACSIndex():
            LOGGER.warning('Active coordset indices of atoms are not the same.'
                           ' Result will have ACSI {0}.'.format(acsi))

        title = '({0}) + ({1})'.format(str(self), str(other))
        indices = concatenate([self._getIndices(), other._getIndices()])

        dummies = 0
        try:
            dummies += self.numDummies()
        except AttributeError:
            pass
        try:
            dummies += other.numDummies()
        except AttributeError:
            pass

        return AtomMap(ag,
                       indices,
                       acsi,
                       title=title,
                       intarrays=True,
                       dummies=dummies)
示例#20
0
def addNonstdAminoacid(resname, *properties):
    """Add non-standard amino acid *resname* with *properties* selected from:

      * {props}

    .. ipython:: python

       addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large',
       'polar', 'surface')

    Default set of non-standard amino acids can be restored as follows:

    .. ipython:: python

       flagDefinition(reset='nonstdaa')"""

    resname = str(resname)
    if len(resname) > 4:
        LOGGER.warn('Residue name {0} is unusually long.'.format(
            repr(resname)))
    propset = set(properties)
    for cat, val in CATEGORIES.items():
        intersection = val.intersection(propset)
        if intersection:
            if len(intersection) > 1:
                raise ValueError('amino acid properties {0} cannot be '
                                 'present together'.format(', '.join(
                                     [repr(prp) for prp in intersection])))
            for prop in intersection:
                propset.remove(prop)
    if propset:
        raise ValueError('amino acid property {0} is not valid'.format(
            repr(propset.pop())))

    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    nonstd[resname] = set(properties)
    updateNonstandard(nonstd)
示例#21
0
    def iterpose(self, rmsd=0.0001):
        """Iteratively superpose the ensemble until convergence.  Initially,
        all conformations are aligned with the reference coordinates.  Then
        mean coordinates are calculated, and are set as the new reference
        coordinates.  This is repeated until reference coordinates do not
        change.  This is determined by the value of RMSD between the new and
        old reference coordinates.  Note that at the end of the iterative
        procedure the reference coordinate set will be average of conformations
        in the ensemble.

        :arg rmsd: change in reference coordinates to determine convergence,
            default is 0.0001 Å RMSD
        :type rmsd: float"""

        if self._coords is None:
            raise AttributeError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise AttributeError('conformations are not set, use'
                                 '`addCoordset`')
        LOGGER.info('Starting iterative superposition:')
        LOGGER.timeit('_prody_ensemble')
        rmsdif = 1
        step = 0
        weights = self._weights
        length = len(self)
        if weights is not None:
            if weights.ndim == 3:
                weightsum = weights.sum(axis=0)
                weightsum[weightsum == 0.] = 1.  # add pseudocount to avoid nan
            else:
                weightsum = length

        while rmsdif > rmsd:
            self._superpose()
            if weights is None:
                newxyz = self._confs.sum(0) / length
            else:
                newxyz = (self._confs * weights).sum(0) / weightsum
            rmsdif = getRMSD(self._coords, newxyz)
            self._coords = newxyz
            step += 1
            LOGGER.info('Step #{0}: RMSD difference = {1:.4e}'.format(
                step, rmsdif))
        LOGGER.report('Iterative superposition completed in %.2fs.',
                      '_prody_ensemble')
示例#22
0
def parseDCD(filename, start=None, stop=None, step=None, astype=None):
    """Parse CHARMM format DCD files (also NAMD 2.1 and later).  Returns an
    :class:`Ensemble` instance. Conformations in the ensemble will be ordered
    as they appear in the trajectory file.  Use :class:`DCDFile` class for
    parsing  coordinates of a subset of atoms.

    :arg filename: DCD filename
    :type filename: str

    :arg start: index of first frame to read
    :type start: int

    :arg stop: index of the frame that stops reading
    :type stop: int

    :arg step: steps between reading frames, default is 1 meaning every frame
    :type step: int

    :arg astype: cast coordinate array to specified type
    :type astype: type"""

    dcd = DCDFile(filename, astype=astype)
    time_ = time()
    n_frames = dcd.numFrames()
    LOGGER.info('DCD file contains {0} coordinate sets for {1} atoms.'.format(
        n_frames, dcd.numAtoms()))
    ensemble = dcd[slice(start, stop, step)]
    dcd.close()
    time_ = time() - time_ or 0.01
    dcd_size = 1.0 * dcd.numFrames() * dcd._bytes_per_frame / (1024 * 1024)
    LOGGER.info('DCD file was parsed in {0:.2f} seconds.'.format(time_))
    LOGGER.info('{0:.2f} MB parsed at input rate {1:.2f} MB/s.'.format(
        dcd_size, dcd_size / time_))
    LOGGER.info('{0} coordinate sets parsed at input rate {1} frame/s.'.format(
        n_frames, int(n_frames / time_)))
    return ensemble
示例#23
0
def calcMSF(coordsets):
    """Calculate mean square fluctuation(s) (MSF)."""

    try:
        ncsets = coordsets.numFrames()
    except AttributeError:
        try:
            coordsets = coordsets.getCoordsets()
        except AttributeError:
            pass
        try:
            ndim, shape = coordsets.ndim, coordsets.shape
        except:
            raise TypeError('coordsets must be a Numpy array or a ProDy '
                            'object with `getCoordsets` method')
        if ndim != 3 or shape[0] == 1:
            raise ValueError('coordsets must contain multiple sets')
        msf = var(coordsets, 0).sum(1)
    else:
        nfi = coordsets.nextIndex()
        natoms = coordsets.numSelected()
        total = zeros((natoms, 3))
        sqsum = zeros((natoms, 3))

        LOGGER.progress(
            'Evaluating {0} frames from {1}:'.format(ncsets, str(coordsets)),
            ncsets, '_prody_calcMSF')
        ncsets = 0
        coordsets.reset()
        for frame in coordsets:
            frame.superpose()
            coords = frame._getCoords()
            total += coords
            sqsum += coords**2
            ncsets += 1
            LOGGER.update(ncsets, label='_prody_calcMSF')
        LOGGER.finish()
        msf = (sqsum / ncsets - (total / ncsets)**2).sum(1)
        coordsets.goto(nfi)
    return msf
示例#24
0
 def run(self, tmax=200, li=0.2, lf=0.01, ei=0.3,
         ef=0.05, Ti=0.1, Tf=2, c=0, calcC=False):
     LOGGER.info('Building coordinates from electron density map. This may take a while.')
     LOGGER.timeit('_prody_make_nodes')
     tmax = int(tmax * self.N)
     li = li * self.N
     if calcC:
         Ti = Ti * self.N
         Tf = Tf * self.N
     for t in range(1, tmax + 1):
         # calc the parameters
         tt = float(t) / tmax
         l = li * np.power(lf / li, tt)
         ep = ei * np.power(ef / ei, tt)
         if calcC:
             T = Ti * np.power(Tf / Ti, tt)
         else:
             T = -1
         self.runOnce(t, l, ep, T, c)
     LOGGER.report('{0} pseudoatoms were fitted in %.2fs.'.format(
         self.N), '_prody_make_nodes')
     return
示例#25
0
def fetchPDBviaHTTP(*pdb, **kwargs):
    """Retrieve PDB file(s) for specified *pdb* identifier(s) and return
    path(s).  Downloaded files will be stored in local PDB folder, if one
    is set using :meth:`.pathPDBFolder`, and copied into *folder*, if
    specified by the user.  If no destination folder is specified, files
    will be saved in the current working directory.  If *compressed* is
    **False**, decompressed files will be copied into *folder*."""

    if kwargs.get('check', True):
        identifiers = checkIdentifiers(*pdb)
    else:
        identifiers = list(pdb)

    output_folder = kwargs.pop('folder', None)
    compressed = bool(kwargs.pop('compressed', True))

    extension = '.pdb'
    local_folder = pathPDBFolder()
    if local_folder:
        local_folder, is_divided = local_folder
        if is_divided:
            getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])),
                                       'pdb' + pdb + '.pdb.gz')
        else:
            getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz')
        if output_folder is None:
            second = lambda filename, pdb: filename
        else:
            if compressed:
                second = lambda filename, pdb: (copyFile(filename,
                            join(output_folder, pdb + extension + '.gz')))
            else:
                second = lambda filename, pdb: gunzip(filename,
                            join(output_folder, pdb + extension))

    else:
        if output_folder is None:
            output_folder = getcwd()
        if compressed:
            getPath = lambda pdb: join(output_folder, pdb + extension + '.gz')
            second = lambda filename, pdb: filename
        else:
            getPath = lambda pdb: join(output_folder, pdb + extension)
            second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb))


    getURL = WWPDB_HTTP_URL[wwPDBServer() or 'us']

    success = 0
    failure = 0
    filenames = []
    for pdb in identifiers:
        if pdb is None:
            filenames.append(None)
            continue
        try:
            handle = openURL(getURL(pdb))
        except Exception as err:
            LOGGER.warn('{0} download failed ({1}).'.format(pdb, str(err)))
            failure += 1
            filenames.append(None)
        else:
            data = handle.read()
            if len(data):
                filename = getPath(pdb)

                with open(filename, 'w+b') as pdbfile:
                    pdbfile.write(data)

                filename = normpath(relpath(second(filename, pdb)))
                LOGGER.debug('{0} downloaded ({1})'
                             .format(pdb, sympath(filename)))
                success += 1
                filenames.append(filename)
            else:
                LOGGER.warn('{0} download failed, reason unknown.'
                            .format(pdb))
                failure += 1
                filenames.append(None)
    LOGGER.debug('PDB download via HTTP completed ({0} downloaded, '
                 '{1} failed).'.format(success, failure))
    if len(identifiers) == 1:
        return filenames[0]
    else:
        return filenames
示例#26
0
def fetchPDBviaFTP(*pdb, **kwargs):
    """Retrieve PDB (default), PDBML, mmCIF, or EMD file(s) for specified *pdb*
    identifier(s) and return path(s).  Downloaded files will be stored in
    local PDB folder, if one is set using :meth:`.pathPDBFolder`, and copied
    into *folder*, if specified by the user.  If no destination folder is
    specified, files will be saved in the current working directory.  If
    *compressed* is **False**, decompressed files will be copied into
    *folder*.  *format* keyword argument can be used to retrieve
    `PDBML <http://pdbml.pdb.org/>`_, `mmCIF <http://mmcif.pdb.org/>`_
    and `PDBML <ftp://ftp.wwpdb.org/pub/emdb/doc/Map-format/current/EMDB_map_format.pdf>`_ 
    files: ``format='cif'`` will fetch an mmCIF file, ``format='emd'`` will fetch an EMD file,
    and ``format='xml'`` will fetch a PDBML file. 
    If PDBML header file is desired, ``noatom=True`` argument will do the job."""

    if kwargs.get('check', True):
        identifiers = checkIdentifiers(*pdb)
    else:
        identifiers = list(pdb)

    output_folder = kwargs.pop('folder', None)
    compressed = bool(kwargs.pop('compressed', True))
    format = str(kwargs.pop('format', 'pdb')).lower()
    noatom = bool(kwargs.pop('noatom', False))

    if format == 'pdb':
        ftp_divided = 'pdb/data/structures/divided/pdb'
        ftp_pdbext = '.ent.gz'
        ftp_prefix = 'pdb'
        extension = '.pdb'
    elif format == 'xml':
        if noatom:
            ftp_divided = 'pdb/data/structures/divided/XML-noatom'
            ftp_pdbext = '-noatom.xml.gz'
            extension = '-noatom.xml'
        else:
            ftp_divided = 'pdb/data/structures/divided/XML'
            ftp_pdbext = '.xml.gz'
            extension = '.xml'
        ftp_prefix = ''
    elif format == 'cif':
        ftp_divided = 'pdb/data/structures/divided/mmCIF'
        ftp_pdbext = '.cif.gz'
        ftp_prefix = ''
        extension = '.cif'
    elif format == 'emd' or format == 'map':
        ftp_divided = 'emdb/structures'
        ftp_pdbext = '.map.gz'
        ftp_prefix = 'emd_'
        extension = '.map'
    else:
        raise ValueError(repr(format) + ' is not valid format')

    local_folder = pathPDBFolder()

    if format == 'pdb' and local_folder:
        local_folder, is_divided = local_folder
        if is_divided:
            getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])),
                                       'pdb' + pdb + '.pdb.gz')
        else:
            getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz')
        if output_folder is None:
            second = lambda filename, pdb: filename
        else:
            if compressed:
                second = lambda filename, pdb: (copyFile(filename,
                            join(output_folder, pdb + extension + '.gz')))
            else:
                second = lambda filename, pdb: gunzip(filename,
                            join(output_folder, pdb + extension))

    else:
        if output_folder is None:
            output_folder = getcwd()
        if compressed:
            getPath = lambda pdb: join(output_folder, pdb + extension + '.gz')
            second = lambda filename, pdb: filename
        else:
            getPath = lambda pdb: join(output_folder, pdb + extension)
            second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb))


    ftp_name, ftp_host, ftp_path = WWPDB_FTP_SERVERS[wwPDBServer() or 'us']
    LOGGER.debug('Connecting wwPDB FTP server {0}.'.format(ftp_name))

    from ftplib import FTP
    try:
        ftp = FTP(ftp_host)
    except Exception as error:
        raise type(error)('FTP connection problem, potential reason: '
                          'no internet connectivity')
    else:
        success = 0
        failure = 0
        filenames = []
        ftp.login('')
        for pdb in identifiers:
            if pdb is None:
                filenames.append(None)
                continue
            data = []
            ftp_fn = ftp_prefix + pdb + ftp_pdbext
            try:
                ftp.cwd(ftp_path)
                ftp.cwd(ftp_divided)
                if format == 'emd':
                    ftp.cwd('EMD-{0}/map'.format(pdb))
                else:
                    ftp.cwd(pdb[1:3])
                ftp.retrbinary('RETR ' + ftp_fn, data.append)
            except Exception as error:
                if ftp_fn in ftp.nlst():
                    LOGGER.warn('{0} download failed ({1}). It is '
                                'possible that you do not have rights to '
                                'download .gz files in the current network.'
                                .format(pdb, str(error)))
                else:
                    LOGGER.info('{0} download failed. {1} does not exist '
                                'on {2}.'.format(ftp_fn, pdb, ftp_host))
                failure += 1
                filenames.append(None)
            else:
                if len(data):
                    filename = getPath(pdb)

                    with open(filename, 'w+b') as pdbfile:
                        write = pdbfile.write
                        [write(block) for block in data]

                    filename = normpath(relpath(second(filename, pdb)))
                    LOGGER.debug('{0} downloaded ({1})'
                                 .format(pdb, sympath(filename)))
                    success += 1
                    filenames.append(filename)
                else:
                    LOGGER.warn('{0} download failed, reason unknown.'
                                .format(pdb))
                    failure += 1
                    filenames.append(None)

        ftp.quit()

    LOGGER.debug('PDB download via FTP completed ({0} downloaded, '
                 '{1} failed).'.format(success, failure))
    if len(identifiers) == 1:
        return filenames[0]
    else:
        return filenames
示例#27
0
def parseImagesFromSTAR(particlesSTAR, **kwargs):
    """
    Parses particle images using data from a STAR file 
    containing information about them.

    :arg particlesSTAR: a filename for a STAR file.
    :type particlesSTAR: str

    :arg block_indices: indices for data blocks containing rows 
        corresponding to images of interest
        The indexing scheme is similar to that for numpy arrays.
        Default behavior is use all data blocks about images
    :type block_indices: list, :class:`~numpy.ndarray`

    :arg row_indices: indices for rows corresponding to images of interest
        The indexing scheme is similar to that for numpy arrays. 
        row_indices should be a 1D or 2D array-like.
        2D row_indices should contain an entry for each relevant loop. 
        If a 1D array-like is given the same row indices 
        will be applied to all loops.
        Default behavior is to use all rows about images
    :type row_indices: list, :class:`~numpy.ndarray`

    :arg particle_indices: indices for particles regardless of STAR structure
        default is take all particles
        Please note: this acts after block_indices and row_indices
    :type particle_indices: list, :class"`~numpy.ndarray`

    :arg saveImageArrays: whether to save the numpy array for each image to file
        default is False
    :type saveImageArrays: bool

    :arg saveDirectory: directory where numpy image arrays are saved
        default is None, which means save to the current working directory
    :type saveDirectory: str, None

    :arg rotateImages: whether to apply in plane translations and rotations using 
        provided psi and origin data, default is True
    :type rotateImages: bool 
    
    """

    try:
        from skimage.transform import rotate
    except ImportError:
        raise ImportError('This function requires scikit-image.')

    block_indices = kwargs.get('block_indices', None)
    # No loop_indices because data blocks about particle images contain 1 loop
    row_indices = kwargs.get('row_indices', None)
    particle_indices = kwargs.get('particle_indices', None)

    saveImageArrays = kwargs.get('saveImageArrays', False)
    saveDirectory = kwargs.get('saveDirectory', None)
    rotateImages = kwargs.get('rotateImages', True)

    try:
        particlesSTAR = parseSTAR(particlesSTAR)
    except:
        raise ValueError('particlesSTAR should be a filename for a STAR file')

    # Check dimensions/contents of particlesSTAR and generate full indices
    dataBlocks = []
    loops = []
    maxLoops = 0
    maxRows = 0
    dataBlock_goodness = []
    for dataBlock in particlesSTAR:

        foundImageField = False
        for loop in dataBlock:
            if ('_image' in loop.fields) or ('_rlnImageName' in loop.fields):
                foundImageField = True
                loops.append(loop)
                if loop.numRows > maxRows:
                    maxRows = loop.numRows
            else:
                dataBlock.pop(int(loop.getTitle().split(' ')[-1]))

        if dataBlock.numLoops > maxLoops:
            maxLoops = dataBlock.numLoops

        if foundImageField:
            dataBlocks.append(dataBlock)
            dataBlock_goodness.append(True)
        else:
            dataBlock_goodness.append(False)

    indices = np.zeros((len(dataBlocks), maxLoops, maxRows, 3), dtype=int)
    i = -1
    for n, dataBlock in enumerate(particlesSTAR):
        if dataBlock_goodness[n]:
            i += 1
            for j, loop in enumerate(dataBlock):
                for k in range(maxRows):
                    if k < loop.numRows:
                        indices[i, j, k] = np.array([n, j, k])
                    else:
                        indices[i, j, k] = np.array([0, 0, 0])

    dataBlocks = np.array(dataBlocks)
    loops = np.array(loops)

    # Convert keyword indices to valid indices if possible
    if block_indices is not None:
        if np.array_equal(dataBlocks, np.array([])):
            raise TypeError(
                'particlesSTAR must have data blocks to use block_indices')

        try:
            block_indices = np.array(block_indices)
        except:
            raise TypeError('block_indices should be array-like')

        if block_indices.ndim != 1:
            raise ValueError(
                'block_indices should be a 1-dimensional array-like')

        for i, index in enumerate(list(reversed(block_indices))):
            try:
                block = particlesSTAR[index]
                if not isinstance(block, StarDataBlock):
                    LOGGER.warn(
                        'There is no block corresponding to block_index {0}. '
                        'This index has been removed.'.format(
                            block_indices.shape[0] - i - 1))
                    block_indices = np.delete(block_indices, i, 0)
            except:
                LOGGER.warn(
                    'There is no block corresponding to block_index {0}. '
                    'This index has been removed.'.format(
                        block_indices.shape[0] - i - 1))
                block_indices = np.delete(block_indices, i, 0)

        if not np.array_equal(block_indices, np.array([])):
            indices = np.concatenate(([
                indices[np.where(indices[:, 0, 0, 0] == item)]
                for item in block_indices
            ]),
                                     axis=0)
        else:
            LOGGER.warn(
                'None of the block_indices corresponded to dataBlocks. '
                'Default block indices corresponding to all dataBlocks '
                'will be used instead.')

    dataBlocks = particlesSTAR[block_indices]

    if row_indices is not None:
        try:
            row_indices = np.array(row_indices)
        except:
            raise TypeError('row_indices should be array-like')

        if row_indices.ndim == 1:
            if isinstance(row_indices[0], int):
                # row_indices provided was truly 1D so
                # we will use same row indices for all data blocks
                # and warn the user we are doing so
                if len(dataBlocks) != 1:
                    LOGGER.warn(
                        'row_indices is 1D but there are multiple data blocks '
                        'so the same row indices will be used for each')

                row_indices = np.array(
                    [row_indices for i in range(len(dataBlocks))])
                # This also works if len(dataBlocks) == 1

            elif isinstance(row_indices[0], (list, tuple)):
                # A list-like of list-likes of different sizes was provided
                # We turn it into a proper 2D array by filling the short
                # list likes with zeros

                if len(row_indices) != len(dataBlocks):
                    raise ValueError(
                        'There should be an entry in row indices for '
                        'each data block')

                max_len = 0
                for entry in row_indices:
                    if not np.isscalar(entry):
                        if len(entry) > max_len:
                            max_len = len(entry)

                row_indices_list_entries = []
                for entry in row_indices:
                    if isinstance(entry, int):
                        list_entry = [entry]
                    else:
                        list_entry = list(entry)

                    while len(list_entry) < max_len:
                        list_entry.append(0)

                    row_indices_list_entries.append(list_entry)

                row_indices = np.array(row_indices_list_entries)

        elif row_indices.ndim == 2:
            # A list-like of list-likes of the same size was provided
            if row_indices.shape[0] != len(dataBlocks):
                if len(row_indices) == 1:
                    # we will use same row indices for all data blocks
                    # and warn the user we are doing so
                    if len(dataBlocks) != 1:
                        LOGGER.warn(
                            'row_indices has one entry but there are multiple data blocks '
                            'so the same row indices will be used for each')

                    row_indices = np.array(
                        [row_indices[0] for i in range(len(dataBlocks))])
                    # This also works if len(dataBlocks) == 1
                else:
                    raise ValueError(
                        'There should be an entry in row indices for '
                        'each data block')

        else:
            raise ValueError(
                'row_indices should be 1D or 2D array-like objects')

        # indices need updating
        good_indices_list = []
        for i, index_i in enumerate(indices):
            good_indices_list.append([])
            for j, index_j in enumerate(index_i):
                good_indices_list[i].append([])
                for r, index_r in enumerate(row_indices[i]):
                    for k, index_k in enumerate(index_j):
                        if k == index_r:
                            if not (r != 0 and index_r == 0):
                                good_indices_list[i][j].append(index_k)
                            else:
                                good_indices_list[i][j].append(
                                    np.array([0, 0, 0]))

        indices = np.array(good_indices_list)

    if indices is np.array([]):
        raise ValueError(
            'selection does not contain any rows with image fields')

    # Use indices to collect particle data dictionaries
    particles = []

    for i, index_i in enumerate(indices):
        for j, index_j in enumerate(index_i):
            for k, index_k in enumerate(index_j):
                if not (np.array_equal(index_k, np.array([0, 0, 0]))
                        and not (i == 0 and j == 0 and k == 0)):
                    particles.append(
                        particlesSTAR[index_k[0]][index_k[1]][index_k[2]])

    if particle_indices is None:
        particle_indices = list(range(len(particles)))

    # Parse images using particle dictionaries
    image_stacks = dict()
    images = []
    parsed_images_data = []
    stk_images = []
    if particlesSTAR._prog == 'XMIPP':
        imageFieldKey = '_image'
    else:
        imageFieldKey = '_rlnImageName'

    for i in particle_indices:
        particle = particles[i]

        try:
            image_field = particle[imageFieldKey]
            image_index = int(image_field.split('@')[0]) - 1
            filename = image_field.split('@')[1]
        except:
            raise ValueError(
                'particlesSTAR does not contain data about particle image '
                '{0} location in either RELION or XMIPP format'.format(i))

        if filename.endswith('.stk'):
            stk_images.append(str(i))
            continue

        if not filename in list(image_stacks.keys()):
            image_stacks[filename] = parseEMD(filename).density

        image = image_stacks[filename][image_index]
        parsed_images_data.append(image_field)

        if saveImageArrays:
            if saveDirectory is not None:
                np.save('{0}/{1}'.format(saveDirectory, i), image)
            else:
                np.save('{1}'.format(i), image)

        if rotateImages:
            if particlesSTAR._prog == 'RELION':
                anglePsi = float(particle['_rlnAnglePsi'])
                originX = float(particle['_rlnOriginX'])
                originY = float(particle['_rlnOriginY'])
            elif particlesSTAR._prog == 'XMIPP':
                anglePsi = float(particle['_anglePsi'])
                originX = float(particle['_shiftX'])
                originY = float(particle['_shiftY'])
            images.append(
                rotate(image,
                       anglePsi,
                       center=(float(image.shape[0]) - originX,
                               float(image.shape[1]) - originY)))
        else:
            images.append(image)

    if len(stk_images) > 0:
        LOGGER.warn(
            'ProDy currently cannot parse images from XMIPP .stk files. '
            'Please be aware that images {0} and {1} will be missing '
            'from the final array.'.format(', '.join(stk_images[:-1]),
                                           stk_images[-1]))

    return np.array(images), parsed_images_data
示例#28
0
    def _superpose(self, **kwargs):
        """Superpose conformations and update coordinates."""

        ref = kwargs.pop('ref', None)

        indices = self._indices
        weights = self._weights
        mobs = self._confs
        if indices is None:
            idx = False
            tar = self._coords
            movs = None
        else:
            idx = True
            if self._weights is not None:
                weights = weights[indices]
            tar = self._coords[indices]
            movs = self._confs

        linalg = importLA()
        svd = linalg.svd
        det = linalg.det

        if weights is None:
            if ref is None:
                tar_com = tar.mean(0)
            else:
                tar_com = tar[ref]

            tar_org = (tar - tar_com)
            mob_org = zeros(tar_org.shape, dtype=mobs.dtype)
            tar_org = tar_org.T
        else:
            weights_sum = weights.sum()
            weights_dot = dot(weights.T, weights)
            if ref is None:
                tar_com = (tar * weights).sum(axis=0) / weights_sum
            else:
                tar_com = (tar[ref] * weights[ref]).sum(axis=0) / sum(
                    weights[ref])
            tar_org = (tar - tar_com)
            mob_org = zeros(tar_org.shape, dtype=mobs.dtype)

        LOGGER.progress('Superposing ', len(mobs), '_prody_ensemble')
        for i, mob in enumerate(mobs):
            if idx:
                mob = mob[indices]
            if weights is None:
                mob_com = mob.mean(0)
                matrix = dot(tar_org, subtract(mob, mob_com, mob_org))
            else:
                mob_com = (mob * weights).sum(axis=0) / weights_sum
                subtract(mob, mob_com, mob_org)
                matrix = dot((tar_org * weights).T,
                             (mob_org * weights)) / weights_dot

            U, s, Vh = svd(matrix)
            Id = array([[1, 0, 0], [0, 1, 0], [0, 0, sign(det(matrix))]])
            rotation = dot(Vh.T, dot(Id, U.T))

            if movs is None:
                mobs[i] = dot(mob_org, rotation)
                add(mobs[i], tar_com, mobs[i])
            else:
                add(dot(movs[i], rotation), (tar_com - dot(mob_com, rotation)),
                    movs[i])
            LOGGER.update(i + 1, label='_prody_ensemble')
        LOGGER.finish()
示例#29
0
def parseMMCIFStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or a class:`.StarDict` 
    containing header data parsed from a stream of CIF lines.
    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    subset = kwargs.get('subset')
    chain = kwargs.get('chain')
    altloc = kwargs.get('altloc', 'A')
    header = kwargs.get('header', False)

    if model is not None:
        if isinstance(model, int):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'
                            .format(str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'
                             .format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix

    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')

        if header:
            ag, header = _parseMMCIFLines(ag, lines, model, chain, subset,
                                          altloc, header)
        else:
            ag = _parseMMCIFLines(ag, lines, model, chain, subset,
                                  altloc, header)

        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(ag.numAtoms(),
                                                    ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
        if header:
            return ag, StarDict(*header, title=str(kwargs.get('title', 'Unknown')))
        return ag
示例#30
0
def _parseMMCIFLines(atomgroup, lines, model, chain, subset,
                     altloc_torf, header):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: mmCIF lines
    """

    if subset is not None:
        if subset == 'ca':
            subset = set(('CA',))
        elif subset in 'bb':
            subset = flags.BACKBONE
        protein_resnames = flags.AMINOACIDS

    asize = 0
    i = 0
    models = []
    nModels = 0
    fields = dict()
    fieldCounter = -1
    foundAtomBlock = False
    doneAtomBlock = False
    start = 0
    stop = 0
    while not doneAtomBlock:
        line = lines[i]
        if line[:11] == '_atom_site.':
            fieldCounter += 1
            fields[line.split('.')[1].strip()] = fieldCounter

        if line.startswith('ATOM') or line.startswith('HETATM'):
            if not foundAtomBlock:
                foundAtomBlock = True
                start = i
            models.append(line.split()[fields['pdbx_PDB_model_num']])
            if models[asize] != models[asize-1]:
                nModels += 1
            asize += 1
        else:
            if foundAtomBlock:
                doneAtomBlock = True
                stop = i
        i += 1
    if nModels == 0:
        nModels = 1

    if model is not None and model != 1:
        for i in range(start, stop):
            if str(models[i]) != model and str(models[i+1]) == model:
                start = i+1
            if str(models[i]) == model and str(models[i+1]) != model:
                stop = i+1
                break
        if not str(model) in models:
            raise mmCIFParseError('model {0} is not found'.format(model))

    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True

    if isinstance(altloc_torf, str):
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'
                        .format(altloc_torf))
            which_altlocs = '.' + ''.join(altloc_torf.split())
        else:
            which_altlocs = '.A'
        altloc_torf = False
    else:
        which_altlocs = '.A'
        altloc_torf = True

    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
    bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
    occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)

    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms

    acount = 0
    for line in lines[start:stop]:
        startswith = line.split()[fields['group_PDB']]

        atomname = line.split()[fields['auth_atom_id']]
        resname = line.split()[fields['auth_comp_id']]

        if subset is not None:
            if not (atomname in subset and resname in protein_resnames):
                continue

        chID = line.split()[fields['auth_asym_id']]
        if chain is not None:
            if isinstance(chain, str):
                chain = chain.split(',')
            if not chID in chain:
                continue

        segID = line.split()[fields['label_asym_id']]

        alt = line.split()[fields['label_alt_id']]
        if alt not in which_altlocs:
            continue

        if model is not None:
            if int(models[acount]) < model:
                continue
            elif int(models[acount]) > model:
                break

        coordinates[acount] = [line.split()[fields['Cartn_x']],
                               line.split()[fields['Cartn_y']],
                               line.split()[fields['Cartn_z']]]
        atomnames[acount] = atomname
        resnames[acount] = resname
        resnums[acount] = line.split()[fields['auth_seq_id']]
        chainids[acount] = chID
        segnames[acount] = segID
        hetero[acount] = startswith == 'HETATM' # True or False

        if chainids[acount] != chainids[acount-1]: 
            termini[acount-1] = True

        altlocs[acount] = alt
        icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']]

        if icodes[acount] == '?': 
            icodes[acount] = ''

        serials[acount] = line.split()[fields['id']]
        elements[acount] = line.split()[fields['type_symbol']]
        bfactors[acount] = line.split()[fields['B_iso_or_equiv']]
        occupancies[acount] = line.split()[fields['occupancy']]

        acount += 1

    if model is not None:
        nModels = 1

    modelSize = acount//nModels

    if addcoords:
        atomgroup.addCoordset(coordinates[:modelSize])
    else:
        atomgroup._setCoords(coordinates[:modelSize])

    atomgroup.setNames(atomnames[:modelSize])
    atomgroup.setResnames(resnames[:modelSize])
    atomgroup.setResnums(resnums[:modelSize])
    atomgroup.setSegnames(segnames[:modelSize])
    atomgroup.setChids(chainids[:modelSize])
    atomgroup.setFlags('hetatm', hetero[:modelSize])
    atomgroup.setFlags('pdbter', termini[:modelSize])
    atomgroup.setAltlocs(altlocs[:modelSize])
    atomgroup.setIcodes(icodes[:modelSize])
    atomgroup.setSerials(serials[:modelSize])

    atomgroup.setElements(elements[:modelSize])
    from caviar.prody_parser.utilities.misctools import getMasses
    atomgroup.setMasses(getMasses(elements[:modelSize]))
    atomgroup.setBetas(bfactors[:modelSize])
    atomgroup.setOccupancies(occupancies[:modelSize])

    for n in range(1, nModels):
        atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize])

    if header:
        header = parseSTARLines(lines[:start-fieldCounter-2] + lines[stop:],
                                shlex=True)
        return atomgroup, header

    return atomgroup