Пример #1
0
class exANM(ANMBase):

    """Class for explicit ANM (exANM) method ([FT00]_).
    Optional arguments build a membrane lattice permit analysis of membrane
     effect on elastic network models in *exANM* method described in [TL12]_.

    .. [TL12] Lezon TR, Bahar I, Constraints Imposed by the Membrane
       Selectively Guide the Alternating Access Dynamics of the Glutamate
       Transporter GltPh

    """

    def __init__(self, name='Unknown'):

        super(exANM, self).__init__(name)
        self._membrane = None
        self._combined = None

    def buildMembrane(self, coords, **kwargs):
        """Build Hessian matrix for given coordinate set.

        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`

        :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0
        :type membrane_hi: float

        :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0
        :type membrane_lo: float

        :arg R: radius of all membrane in x-y direction default is 80. 
        :type R: float

        :arg r: radius of individual barrel-type membrane protein default is 2.5.
        :type 
        
        :arg lat: lattice type which could be FCC(face-centered-cubic)(default), 
        SC(simple cubic), SH(simple hexagonal)
        :type lat: str
        """
        if type(coords) is AtomGroup:
            buildAg = True
        else:
            buildAg = False
        
        try:
            coords = (coords._getCoords() if hasattr(coords, '_getCoords') else
                      coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        self._n_atoms = natoms = int(coords.shape[0])

        pxlo = min(np.append(coords[:,0],10000))
        pxhi = max(np.append(coords[:,0],-10000))
        pylo = min(np.append(coords[:,1],10000))
        pyhi = max(np.append(coords[:,1],-10000))
        pzlo = min(np.append(coords[:,2],10000))
        pzhi = max(np.append(coords[:,2],-10000))

        membrane_hi = float(kwargs.get('membrane_hi', 13.0))
        membrane_lo = float(kwargs.get('membrane_lo', -13.0))
        R = float(kwargs.get('R', 80))
        r = float(kwargs.get('r', 5))
        lat = str(kwargs.get('lat', 'FCC'))
        lpv = assign_lpvs(lat)

        imax = (R + lpv[0,2] * (membrane_hi - membrane_lo)/2.)/r
        jmax = (R + lpv[1,2] * (membrane_hi - membrane_lo)/2.)/r
        kmax = (R + lpv[2,2] * (membrane_hi - membrane_lo)/2.)/r    

        #print pxlo, pxhi, pylo, pyhi, pzlo, pzhi
        #print lpv[0,2],lpv[1,2],lpv[2,2]
        #print R,r,imax,jmax,kmax
        membrane = zeros((1,3))

        LOGGER.timeit('_membrane')
        membrane = zeros((1,3))
        atm = 0
        for i in range(-int(imax),int(imax+1)):
            for j in range(-int(jmax),int(jmax+1)):
                for k in range(-int(kmax),int(kmax+1)):
                    X = zeros((1,3))
                    for p in range(3):
                        X[0,p]=2.*r*(i*lpv[0,p]+j*lpv[1,p]+k*lpv[2,p])
                    dd=0
                    for p in range(3):
                        dd += X[0,p] ** 2
                    if dd<R**2 and X[0,2]>membrane_lo and X[0,2]<membrane_hi:
                        if X[0,0]>pxlo-R/2 and X[0,0]<pxhi+R/2 and X[0,1]>pylo-R/2 and X[0,1]<pyhi+R/2 and X[0,2]>pzlo and X[0,2]<pzhi:
                            if checkClash(X, coords[:natoms,:], radius=5):
                                if atm == 0:
                                    membrane = X
                                else:
                                    membrane = np.append(membrane, X, axis=0)
                                atm = atm + 1 
        #print atm             

        self._membrane = AtomGroup(title="Membrane")
        self._membrane.setCoords(membrane)
        self._membrane.setResnums(range(atm))
        self._membrane.setResnames(["NE1" for i in range(atm)])
        self._membrane.setChids(["Q" for i in range(atm)])
        self._membrane.setElements(["Q1" for i in range(atm)])
        self._membrane.setNames(["Q1" for i in range(atm)])
        LOGGER.report('Membrane was built in %2.fs.', label='_membrane')

    def buildHessian(self, coords, cutoff=15., gamma=1., **kwargs):
        """Build Hessian matrix for given coordinate set.

        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`

        :arg cutoff: cutoff distance (Å) for pairwise interactions,
            default is 15.0 Å
        :type cutoff: float

        :arg gamma: spring constant, default is 1.0
        :type gamma: float

        :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0
        :type membrane_hi: float

        :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0
        :type membrane_lo: float

        :arg R: radius of all membrane in x-y direction default is 80. 
        :type R: float

        :arg r: radius of individual barrel-type membrane protein default is 2.5.
        :type 
        
        :arg lat: lattice type which could be FCC(face-centered-cubic)(default), 
        SC(simple cubic), SH(simple hexagonal)
        :type lat: str
        """

        try:
            coords = (coords._getCoords() if hasattr(coords, '_getCoords') else
                      coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        self._n_atoms = natoms = int(coords.shape[0])

        if self._membrane is None:
            membrane_hi = float(kwargs.get('membrane_hi', 13.0))
            membrane_lo = float(kwargs.get('membrane_lo', -13.0))
            R = float(kwargs.get('R', 80))
            r = float(kwargs.get('r', 5))
            lat = str(kwargs.get('lat', 'FCC'))
            buildMembrane(self,coords,membrane_hi=membrane_hi, membrane_lo=membrane_lo,R=R,r=r,lat=lat)


        LOGGER.timeit('_exanm')
        coords = np.concatenate((coords,self._membrane.getCoords()),axis=0)
        self._combined_coords = coords
        total_natoms = int(coords.shape[0])
        self._hessian = np.zeros((natoms*3, natoms*3), float)
        total_hessian = np.zeros((total_natoms*3, total_natoms*3), float)
        cutoff, g, gamma = checkENMParameters(cutoff, gamma)
        cutoff2 = cutoff * cutoff
        for i in range(total_natoms):
            res_i3 = i*3
            res_i33 = res_i3+3
            i_p1 = i+1
            i2j_all = coords[i_p1:, :] - coords[i]
            for j, dist2 in enumerate((i2j_all ** 2).sum(1)):
                if dist2 > cutoff2:
                    continue
                i2j = i2j_all[j]
                j += i_p1
                g = gamma(dist2, i, j)
                res_j3 = j*3
                res_j33 = res_j3+3
                super_element = np.outer(i2j, i2j) * (- g / dist2)
                total_hessian[res_i3:res_i33, res_j3:res_j33] = super_element
                total_hessian[res_j3:res_j33, res_i3:res_i33] = super_element
                total_hessian[res_i3:res_i33, res_i3:res_i33] = total_hessian[res_i3:res_i33, res_i3:res_i33] - super_element
                total_hessian[res_j3:res_j33, res_j3:res_j33] = total_hessian[res_j3:res_j33, res_j3:res_j33] - super_element

        ss = total_hessian[:natoms*3, :natoms*3]
        so = total_hessian[:natoms*3, natoms*3+1:]
        os = total_hessian[natoms*3+1:,:natoms*3]
        oo = total_hessian[natoms*3+1:, natoms*3+1:]
        self._hessian = ss - np.dot(so, np.dot(linalg.inv(oo), os))
        LOGGER.report('Hessian was built in %.2fs.', label='_exanm')
        self._dof = self._hessian.shape[0]
    
    def calcModes(self, n_modes=20, zeros=False, turbo=True):
        """Calculate normal modes.  This method uses :func:`scipy.linalg.eigh`
        function to diagonalize the Hessian matrix. When Scipy is not found,
        :func:`numpy.linalg.eigh` is used.

        :arg n_modes: number of non-zero eigenvalues/vectors to calculate.
            If ``None`` is given, all modes will be calculated.
        :type n_modes: int or None, default is 20

        :arg zeros: If ``True``, modes with zero eigenvalues will be kept.
        :type zeros: bool, default is ``False``

        :arg turbo: Use a memory intensive, but faster way to calculate modes.
        :type turbo: bool, default is ``True``
        """

        super(exANM, self).calcModes(n_modes, zeros, turbo)

    def getMembrane(self):
        """Returns a copy of the membrane coordinates."""

        if self._membrane is not None:
            return self._membrane.copy()

    def _getMembrane(self):
        return self._membrane

    def combineMembraneProtein(self, coords):
        try:
            if type(coords) is AtomGroup:
                self._combined = coords + self._membrane
        except TypeError:
            raise TypeError('coords must be an AtomGroup object '
                                'with `getCoords` method')
    
    def writeCombinedPDB(self,filename):
        """ Given membrane coordinates it will write a pdb file with membrane coordinates. 
        :arg filename: filename for the pdb file. 
        :type filename: str

        :arg membrane: membrane coordinates or the membrane structure. 
        :type membrane: nd.array
        """
        if self._combined is None:
            combineMembraneProtein(self,coords)
        try:
            writePDB(filename,self._combined)
        except TypeError:
            raise "Membrane not found. Use buildMembrane() function."    
Пример #2
0
class exANM(ANMBase):
    """Class for explicit ANM (exANM) method ([FT00]_).
    Optional arguments build a membrane lattice permit analysis of membrane
     effect on elastic network models in *exANM* method described in [TL12]_.

    .. [TL12] Lezon TR, Bahar I, Constraints Imposed by the Membrane
       Selectively Guide the Alternating Access Dynamics of the Glutamate
       Transporter GltPh

    """
    def __init__(self, name='Unknown'):

        super(exANM, self).__init__(name)
        self._membrane = None
        self._combined = None

    def buildMembrane(self, coords, **kwargs):
        """Build Hessian matrix for given coordinate set.

        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`

        :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0
        :type membrane_hi: float

        :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0
        :type membrane_lo: float

        :arg R: radius of all membrane in x-y direction default is 80. 
        :type R: float

        :arg r: radius of individual barrel-type membrane protein default is 2.5.
        :type 
        
        :arg lat: lattice type which could be FCC(face-centered-cubic)(default), 
        SC(simple cubic), SH(simple hexagonal)
        :type lat: str
        """
        if type(coords) is AtomGroup:
            buildAg = True
        else:
            buildAg = False

        try:
            coords = (coords._getCoords()
                      if hasattr(coords, '_getCoords') else coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        self._n_atoms = natoms = int(coords.shape[0])

        pxlo = min(np.append(coords[:, 0], 10000))
        pxhi = max(np.append(coords[:, 0], -10000))
        pylo = min(np.append(coords[:, 1], 10000))
        pyhi = max(np.append(coords[:, 1], -10000))
        pzlo = min(np.append(coords[:, 2], 10000))
        pzhi = max(np.append(coords[:, 2], -10000))

        membrane_hi = float(kwargs.get('membrane_hi', 13.0))
        membrane_lo = float(kwargs.get('membrane_lo', -13.0))
        R = float(kwargs.get('R', 80))
        r = float(kwargs.get('r', 5))
        lat = str(kwargs.get('lat', 'FCC'))
        lpv = assign_lpvs(lat)

        imax = (R + lpv[0, 2] * (membrane_hi - membrane_lo) / 2.) / r
        jmax = (R + lpv[1, 2] * (membrane_hi - membrane_lo) / 2.) / r
        kmax = (R + lpv[2, 2] * (membrane_hi - membrane_lo) / 2.) / r

        #print pxlo, pxhi, pylo, pyhi, pzlo, pzhi
        #print lpv[0,2],lpv[1,2],lpv[2,2]
        #print R,r,imax,jmax,kmax
        membrane = zeros((1, 3))

        LOGGER.timeit('_membrane')
        membrane = zeros((1, 3))
        atm = 0
        for i in range(-int(imax), int(imax + 1)):
            for j in range(-int(jmax), int(jmax + 1)):
                for k in range(-int(kmax), int(kmax + 1)):
                    X = zeros((1, 3))
                    for p in range(3):
                        X[0, p] = 2. * r * (i * lpv[0, p] + j * lpv[1, p] +
                                            k * lpv[2, p])
                    dd = 0
                    for p in range(3):
                        dd += X[0, p]**2
                    if dd < R**2 and X[0,
                                       2] > membrane_lo and X[0,
                                                              2] < membrane_hi:
                        if X[0, 0] > pxlo - R / 2 and X[
                                0, 0] < pxhi + R / 2 and X[
                                    0, 1] > pylo - R / 2 and X[
                                        0, 1] < pyhi + R / 2 and X[
                                            0, 2] > pzlo and X[0, 2] < pzhi:
                            if checkClash(X, coords[:natoms, :], radius=5):
                                if atm == 0:
                                    membrane = X
                                else:
                                    membrane = np.append(membrane, X, axis=0)
                                atm = atm + 1
        #print atm

        self._membrane = AtomGroup(title="Membrane")
        self._membrane.setCoords(membrane)
        self._membrane.setResnums(list(range(atm)))
        self._membrane.setResnames(["NE1" for i in range(atm)])
        self._membrane.setChids(["Q" for i in range(atm)])
        self._membrane.setElements(["Q1" for i in range(atm)])
        self._membrane.setNames(["Q1" for i in range(atm)])
        LOGGER.report('Membrane was built in %2.fs.', label='_membrane')

    def buildHessian(self, coords, cutoff=15., gamma=1., **kwargs):
        """Build Hessian matrix for given coordinate set.

        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`

        :arg cutoff: cutoff distance (Å) for pairwise interactions,
            default is 15.0 Å
        :type cutoff: float

        :arg gamma: spring constant, default is 1.0
        :type gamma: float

        :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0
        :type membrane_hi: float

        :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0
        :type membrane_lo: float

        :arg R: radius of all membrane in x-y direction default is 80. 
        :type R: float

        :arg r: radius of individual barrel-type membrane protein default is 2.5.
        :type 
        
        :arg lat: lattice type which could be FCC(face-centered-cubic)(default), 
        SC(simple cubic), SH(simple hexagonal)
        :type lat: str
        """

        try:
            coords = (coords._getCoords()
                      if hasattr(coords, '_getCoords') else coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        self._n_atoms = natoms = int(coords.shape[0])

        if self._membrane is None:
            membrane_hi = float(kwargs.get('membrane_hi', 13.0))
            membrane_lo = float(kwargs.get('membrane_lo', -13.0))
            R = float(kwargs.get('R', 80))
            r = float(kwargs.get('r', 5))
            lat = str(kwargs.get('lat', 'FCC'))
            self.buildMembrane(coords,
                               membrane_hi=membrane_hi,
                               membrane_lo=membrane_lo,
                               R=R,
                               r=r,
                               lat=lat)

        LOGGER.timeit('_exanm')
        coords = np.concatenate((coords, self._membrane.getCoords()), axis=0)
        self._combined_coords = coords
        total_natoms = int(coords.shape[0])
        self._hessian = np.zeros((natoms * 3, natoms * 3), float)
        total_hessian = np.zeros((total_natoms * 3, total_natoms * 3), float)
        cutoff, g, gamma = checkENMParameters(cutoff, gamma)
        cutoff2 = cutoff * cutoff
        for i in range(total_natoms):
            res_i3 = i * 3
            res_i33 = res_i3 + 3
            i_p1 = i + 1
            i2j_all = coords[i_p1:, :] - coords[i]
            for j, dist2 in enumerate((i2j_all**2).sum(1)):
                if dist2 > cutoff2:
                    continue
                i2j = i2j_all[j]
                j += i_p1
                g = gamma(dist2, i, j)
                res_j3 = j * 3
                res_j33 = res_j3 + 3
                super_element = np.outer(i2j, i2j) * (-g / dist2)
                total_hessian[res_i3:res_i33, res_j3:res_j33] = super_element
                total_hessian[res_j3:res_j33, res_i3:res_i33] = super_element
                total_hessian[res_i3:res_i33, res_i3:res_i33] = total_hessian[
                    res_i3:res_i33, res_i3:res_i33] - super_element
                total_hessian[res_j3:res_j33, res_j3:res_j33] = total_hessian[
                    res_j3:res_j33, res_j3:res_j33] - super_element

        ss = total_hessian[:natoms * 3, :natoms * 3]
        so = total_hessian[:natoms * 3, natoms * 3 + 1:]
        os = total_hessian[natoms * 3 + 1:, :natoms * 3]
        oo = total_hessian[natoms * 3 + 1:, natoms * 3 + 1:]
        self._hessian = ss - np.dot(so, np.dot(linalg.inv(oo), os))
        LOGGER.report('Hessian was built in %.2fs.', label='_exanm')
        self._dof = self._hessian.shape[0]

    def calcModes(self, n_modes=20, zeros=False, turbo=True):
        """Calculate normal modes.  This method uses :func:`scipy.linalg.eigh`
        function to diagonalize the Hessian matrix. When Scipy is not found,
        :func:`numpy.linalg.eigh` is used.

        :arg n_modes: number of non-zero eigenvalues/vectors to calculate.
            If ``None`` is given, all modes will be calculated.
        :type n_modes: int or None, default is 20

        :arg zeros: If ``True``, modes with zero eigenvalues will be kept.
        :type zeros: bool, default is ``False``

        :arg turbo: Use a memory intensive, but faster way to calculate modes.
        :type turbo: bool, default is ``True``
        """

        super(exANM, self).calcModes(n_modes, zeros, turbo)

    def getMembrane(self):
        """Returns a copy of the membrane coordinates."""

        if self._membrane is not None:
            return self._membrane.copy()

    def _getMembrane(self):
        return self._membrane

    def combineMembraneProtein(self, coords):
        try:
            if type(coords) is AtomGroup:
                self._combined = coords + self._membrane
        except TypeError:
            raise TypeError('coords must be an AtomGroup object '
                            'with `getCoords` method')

    def writeCombinedPDB(self, filename):
        """ Given membrane coordinates it will write a pdb file with membrane coordinates. 
        :arg filename: filename for the pdb file. 
        :type filename: str

        :arg membrane: membrane coordinates or the membrane structure. 
        :type membrane: nd.array
        """
        if self._combined is None:
            combineMembraneProtein(self, coords)
        try:
            writePDB(filename, self._combined)
        except TypeError:
            raise "Membrane not found. Use buildMembrane() function."
Пример #3
0
def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from prody import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')
    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            path = None
        #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}'
        #       '.xml'.format(cci.upper()))
        url = 'http://www.pdb.org/pdb/files/ligand/{0}.xml'.format(cci.upper())
        if not xml:
            #'http://www.pdb.org/pdb/files/ligand/{0}.xml'
            try:
                inp = openURL(url)
            except IOError:
                raise IOError('XML file for ligand {0} is not found online'
                              .format(cci))
            else:
                xml = inp.read()
                inp.close()
            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}')+1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [(audit.get('action_type'), audit.get('date'))
                       for audit in
                       list(root.find(ns + 'pdbx_chem_comp_auditCategory'))]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_
Пример #4
0
def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from prody import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')
    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            path = None
        #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}'
        #       '.xml'.format(cci.upper()))
        url = 'http://files.rcsb.org/ligands/download/{0}.xml'.format(
            cci.upper())
        if not xml:
            #'http://www.pdb.org/pdb/files/ligand/{0}.xml'
            try:
                inp = openURL(url)
            except IOError:
                raise IOError(
                    'XML file for ligand {0} is not found online'.format(cci))
            else:
                xml = inp.read()
                inp.close()
            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}') + 1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [
        (audit.get('action_type'), audit.get('date'))
        for audit in list(root.find(ns + 'pdbx_chem_comp_auditCategory'))
    ]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_