示例#1
0
    def getResnums(self, gaps=False):
        """Return list of residue numbers associated with non-gapped *seq*.
        When *gaps* is **True**, return a list containing the residue numbers
        with gaps appearing as **None**.  Residue numbers are inferred from the
        full label.  When label does not contain residue number information,
        indices a range of numbers starting from 1 is returned."""

        title, start, end = splitSeqLabel(self.getLabel(True))
        try:
            start, end = int(start), int(end)
        except:
            LOGGER.info('Cannot parse label start, end values, Setting '
                        'resnums 1 to {0:d}'.format(self.numResidues()))
            start, end = 1, self.numResidues()
        else:
            if (end - start + 1) != self.numResidues():
                LOGGER.info('Label start-end position does not match '
                            'length of ungapped sequence. Setting '
                            'resnums 1 to {0:d}'.format(self.numResidues()))
                start, end = 1, self.numResidues()

        resnums = iter(range(start, end + 1))
        if gaps:
            return [next(resnums) if torf else None
                    for torf in char.isalpha(self._array)]
        else:
            return list(resnums)
示例#2
0
def deformAtoms(atoms, mode, rmsd=None):
    """Generate a new coordinate set for *atoms* along the *mode*.  *atoms*
    must be a :class:`.AtomGroup` instance.  New coordinate set will be
    appended to *atoms*. If *rmsd* is provided, *mode* will be scaled to
    generate a coordinate set with given RMSD distance to the active coordinate
    set."""

    if not isinstance(atoms, AtomGroup):
        raise TypeError('atoms must be an AtomGroup, not {0}'
                        .format(type(atoms)))
    if not isinstance(mode, VectorBase):
        raise TypeError('mode must be a Mode or Vector instance, '
                        'not {0}'.format(type(mode)))
    if not mode.is3d():
        raise ValueError('mode must be from a 3-dimensional model.')
    if atoms.numAtoms() != mode.numAtoms():
        raise ValueError('number of atoms do not match')

    array = mode.getArrayNx3()

    if rmsd is not None:
        rmsd = float(rmsd)
        # rmsd = ( ((scalar * array)**2).sum() / n_atoms )**0.5
        scalar = (atoms.numAtoms() * rmsd**2 / (array**2).sum())**0.5
        LOGGER.info('Mode is scaled by {0}.'.format(scalar))
        atoms.addCoordset(atoms.getCoords() + array * scalar)
    else:
        atoms.addCoordset(atoms.getCoords() + array)
示例#3
0
    def _superpose(self, **kwargs):
        """Superpose conformations and update coordinates."""

        calcT = getTransformation
        if kwargs.get('trans', False):
            if self._trans is not None:
                LOGGER.info('Existing transformations will be overwritten.')
            trans = np.zeros((self._n_csets, 4, 4))
        else:
            trans = None
        indices = self._indices
        if indices is None:
            weights = self._weights
            coords = self._coords
            confs = self._confs
            confs_selected = self._confs
        else:
            weights = self._weights[:, indices]
            coords = self._coords[indices]
            confs = self._confs
            confs_selected = self._confs[:, indices]

        for i, conf in enumerate(confs_selected):
            rmat, tvec = calcT(conf, coords, weights[i])
            if trans is not None:
                trans[i][:3, :3] = rmat
                trans[i][:3, 3] = tvec
            confs[i] = tvec + np.dot(confs[i], rmat.T)
        self._trans = trans
示例#4
0
文件: cath.py 项目: fongchun/ProDy
    def parsePDBs(self, **kwargs):
        """Load PDB into memory as :class:`.AtomGroup` instances using :func:`.parsePDB` and 
        perform selection based on residue ranges given by CATH."""
        
        pdbs = self.getPDBs(True)
        selstrs = self.getSelStrs()
        header = kwargs.get('header', False)
        model = kwargs.get('model', None)

        LOGGER.timeit('_cath_parsePDB')
        LOGGER.info('Parsing {0} PDB files...'.format(len(pdbs)))
        ret = parsePDB(*pdbs, **kwargs)

        if model != 0:
            if header:
                prots, _ = ret
            else:
                prots = ret

            LOGGER.info('Extracting domains...')
            for i in range(len(prots)):
                sel = prots[i].select(selstrs[i])
                prots[i] = sel
        LOGGER.report('CATH domains are parsed and extracted in %.2fs', '_cath_parsePDB')

        return ret
示例#5
0
    def iterpose(self, rmsd=0.0001):

        confs = self._confs.copy()
        Ensemble.iterpose(self, rmsd)
        self._confs = confs
        LOGGER.info('Final superposition to calculate transformations.')
        self.superpose()
示例#6
0
def loadPDBClusters(sqid=None):
    """Load previously fetched PDB sequence clusters from disk to memory."""

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if sqid is None:
        sqid_list = list(PDB_CLUSTERS)
        LOGGER.info('Loading all PDB sequence clusters.')
    else:
        assert isinstance(sqid, int), 'sqid must be an integer' 
        if sqid not in PDB_CLUSTERS:
            raise ValueError('PDB cluster data is not available for sequence '
                             'identity {0}%, try one of {1}'
                             .format(sqid, PDB_CLUSTERS_SQID_STR))
        LOGGER.info('Loading PDB sequence clusters for sequence identity '
                    '{0}.'.format(sqid))
        sqid_list = [sqid]
    global PDB_CLUSTERS_UPDATE_WARNING
    for sqid in sqid_list:
        filename = os.path.join(PDB_CLUSTERS_PATH, 
                                'bc-{0}.out.gz'.format(sqid))
        if not os.path.isfile(filename):
            fetchPDBClusters(sqid)
            
        if PDB_CLUSTERS_UPDATE_WARNING:
            import time
            diff = (time.time() - os.path.getmtime(filename)) / 604800.
            if diff > 1.:
                LOGGER.warning('PDB sequence clusters are {0:.1f} week(s) old,'
                               ' call `fetchPDBClusters` to receive updates.'
                               .format(diff))
                PDB_CLUSTERS_UPDATE_WARNING = False
        inp = openFile(filename)
        PDB_CLUSTERS[sqid] = inp.read()
        inp.close()
示例#7
0
def psiBlastRun(sequence, cycles=2, filename=None, **kwargs):
    """Returns the results from a full PSI-BLAST run (multiple cycles).
    All arguments are the same as psiBlastCycle and are passed to it
    except for cycles.

    :arg cycles: the number of cycles to run
        default is 2
    :type cycles: int
    """
    psithr = kwargs.get('psithr', 1.0e-3)
    job_id = kwargs.get('previousjobid','') 
    selectedHits = kwargs.get('selectedHits','')

    cycles_done = 0
    results_list = []
    job_ids = []
    while cycles_done < cycles:
        if cycles_done > 0:
            selectedHits = 'http://www.ebi.ac.uk/Tools/services/rest/psiblast/result/' \
                      + job_id + '/preselected_seq'
            sequence = None
        job_id, results, sequence = psiBlastCycle(sequence, filename, \
                                                 previousjobid=job_id, \
                                                 selectedHits=selectedHits, \
                                                 cycle=cycles_done, **kwargs)
        results_list.append(results)
        job_ids.append(job_id)
        cycles_done += 1
        LOGGER.info('Finished cycle {0} with job ID {1}.'.format(cycles_done, job_id))

    return job_ids, results_list, sequence
示例#8
0
文件: localpdb.py 项目: sixpi/ProDy
def pathPDBMirror(path=None, format=None):
    """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`.
    To release the current mirror, pass an invalid path, e.g. ``path=''``.
    If you are keeping a partial mirror, such as PDB files in
    :file:`/data/structures/divided/pdb/` folder, specify *format*, which is
    ``'pdb'`` in this case."""

    if path is None:
        path = SETTINGS.get('pdb_mirror_path')
        format = SETTINGS.get('pdb_mirror_format', None)
        if path:
            if isdir(path):
                if format is None:
                    return path
                else:
                    return path, format
            else:
                LOGGER.warning('PDB mirror path {0} is not a accessible.'
                               .format(repr(path)))
    else:
        if isdir(path):
            path = abspath(path)
            LOGGER.info('Local PDB mirror path is set: {0}'
                        .format(repr(path)))
            SETTINGS['pdb_mirror_path'] = path
            SETTINGS['pdb_mirror_format'] = format
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_mirror_path')
            if current:
                LOGGER.info('PDB mirror {0} is released.'
                            .format(repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(path)))
示例#9
0
文件: wwpdb.py 项目: fongchun/ProDy
def wwPDBServer(*key):
    """Set/get `wwPDB`_ FTP/HTTP server location used for downloading PDB
    structures.  Use one of the following keywords for setting a server:

    +---------------------------+-----------------------------+
    | wwPDB FTP server          | *Key* (case insensitive)    |
    +===========================+=============================+
    | RCSB PDB (USA) (default)  | RCSB, USA, US               |
    +---------------------------+-----------------------------+
    | PDBe (Europe)             | PDBe, Europe, Euro, EU      |
    +---------------------------+-----------------------------+
    | PDBj (Japan)              | PDBj, Japan, Jp             |
    +---------------------------+-----------------------------+

    .. _wwPDB: http://www.wwpdb.org/"""

    if not key:
        return SETTINGS.get('wwpdb', None)
    elif len(key) == 1:
        try:
            key = key[0].lower()
        except AttributeError:
            raise TypeError('key must be a string')
        if key in WWPDB_FTP_SERVERS:
            SETTINGS['wwpdb'] = key
            SETTINGS.save()
            LOGGER.info('wwPDB server is set to {}.'
                        .format(WWPDB_FTP_SERVERS[key][0]))
        else:
            raise ValueError('{0} is not a valid wwPDB server identifier'
                             .format(repr(key)))
    else:
        raise TypeError('one wwPDB server identifier is expected, {0} given'
                        .format(len(key)))
示例#10
0
def calcCrossProjection(ensemble, mode1, mode2, scale=None, **kwargs):
    """Return projection of conformational deviations onto modes from
    different models.

    :arg ensemble: ensemble for which deviations will be projected
    :type ensemble: :class:`.Ensemble`
    :arg mode1: normal mode to project conformations onto
    :type mode1: :class:`.Mode`, :class:`.Vector`
    :arg mode2: normal mode to project conformations onto
    :type mode2: :class:`.Mode`, :class:`.Vector`
    :arg scale: scale width of the projection onto mode ``x`` or ``y``,
        best scaling factor will be calculated and printed on the console,
        absolute value of scalar makes the with of two projection same,
        sign of scalar makes the projections yield a positive correlation"""

    if not isinstance(ensemble, (Ensemble, Conformation, Vector, TrajBase)):
        raise TypeError('ensemble must be Ensemble, Conformation, Vector, '
                        'or a Trajectory, not {0}'.format(type(ensemble)))
    if not isinstance(mode1, VectorBase):
        raise TypeError('mode1 must be a Mode instance, not {0}'
                        .format(type(mode1)))
    if not mode1.is3d():
        raise ValueError('mode1 must be 3-dimensional')
    if not isinstance(mode2, VectorBase):
        raise TypeError('mode2 must be a Mode instance, not {0}'
                        .format(type(mode2)))
    if not mode2.is3d():
        raise ValueError('mode2 must be 3-dimensional')

    if scale is not None:
        assert isinstance(scale, str), 'scale must be a string'
        scale = scale.lower()
        assert scale in ('x', 'y'), 'scale must be x or y'

    xcoords = calcProjection(ensemble, mode1, kwargs.get('rmsd', True))
    ycoords = calcProjection(ensemble, mode2, kwargs.pop('rmsd', True))
    if scale:
        scalar = kwargs.get('scalar', None)
        if scalar:
            assert isinstance(scalar, (float, int)), 'scalar must be a number'
        else:
            scalar = ((ycoords.max() - ycoords.min()) /
                      (xcoords.max() - xcoords.min())
                      ) * np.sign(np.dot(xcoords, ycoords))
            if scale == 'x':
                LOGGER.info('Projection onto {0} is scaled by {1:.2f}'
                            .format(mode1, scalar))
            else:
                scalar = 1 / scalar
                LOGGER.info('Projection onto {0} is scaled by {1:.2f}'
                            .format(mode2, scalar))

        if scale == 'x':
            xcoords = xcoords * scalar
        else:
            ycoords = ycoords * scalar

    return xcoords, ycoords
示例#11
0
def pathVMD(*path):
    """Return VMD path, or set it to be a user specified *path*."""

    if not path:
        path = SETTINGS.get('vmd', None)
        if isExecutable(path):
            return path
        else:
            LOGGER.warning('VMD path is not set by user, looking for it.')

            vmdbin = None
            vmddir = None
            if PLATFORM == 'Windows':
                if PY3K:
                    import winreg
                else:
                    import _winreg as winreg  # PY3K: OK
                for vmdversion in ('1.8.7', '1.9', '1.9.1'):
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                                'Software\\University of Illinois\\VMD\\' +
                                vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                    'Software\\WOW6432node\\University of Illinois\\VMD\\' +
                    vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
            else:
                vmdbin = which('vmd')
                if False:
                    pipe = os.popen('which vmd')
                    vmdbin = pipe.next().strip()
                    vmdfile = open(vmdbin)
                    for line in vmdfile:
                        if line.startswith('defaultvmddir='):
                            vmddir = line.split('=')[1].replace('"', '')
                            break
                    vmdfile.close()
            if isExecutable(vmdbin):
                setVMDpath(vmdbin)
                return vmdbin
    elif len(path) == 1:
        path = path[0]
        if isExecutable(path):
            SETTINGS['vmd'] = path
            SETTINGS.save()
            LOGGER.info("VMD path is set to '{0}'.".format(path))
        else:
            raise OSError('{0} is not executable.'.format(str(path)))
    else:
        raise ValueError('specify a single path string')
示例#12
0
def setVMDpath(path):
    """Set path to a VMD executable."""

    if isExecutable(path):
        SETTINGS["vmd"] = path
        SETTINGS.save()
        LOGGER.info("VMD path is set to '{0:s}'.".format(path))
    else:
        raise OSError("{0:s} is not executable.".format(str(path)))
示例#13
0
文件: pdbfile.py 项目: prody/ProDy
def _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames):
    altloc_keys = list(altloc)
    altloc_keys.sort()
    indices = {}
    for key in altloc_keys:
        xyz = atomgroup.getCoords()
        success = 0
        lines = altloc[key]
        for line, i in lines:
            aan = line[12:16].strip()
            arn = line[17:21].strip()
            ach = line[21]
            ari = int(line[22:26].split()[0])
            rn, ids, ans = indices.get((ach, ari), (None, None, None))
            if ids is None:
                ids = indices.get(ach, None)
                if ids is None:
                    ids = (chainids == ach).nonzero()[0]
                    indices[ach] = ids
                ids = ids[resnums[ids] == ari]
                if len(ids) == 0:
                    LOGGER.warn("failed to parse altloc {0} at line {1}, "
                                "residue not present for altloc 'A'".format(
                                repr(key), i+1))
                    continue
                rn = resnames[ids[0]]
                ans = atomnames[ids]
                indices[(ach, ari)] = (rn, ids, ans)
            if rn != arn:
                LOGGER.warn("failed to parse altloc {0} at line {1}, "
                            "residue name mismatch (expected {2}, "
                            "parsed {3})".format(repr(key), i+1, repr(rn),
                                                   repr(arn)))
                continue
            index = ids[(ans == aan).nonzero()[0]]
            if len(index) != 1:
                LOGGER.warn("failed to parse altloc {0} at line {1}, atom"
                            " {2} not found in the residue"
                            .format(repr(key), i+1, repr(aan)))
                continue
            try:
                xyz[index[0], 0] = float(line[30:38])
                xyz[index[0], 1] = float(line[38:46])
                xyz[index[0], 2] = float(line[46:54])
            except:
                LOGGER.warn('failed to parse altloc {0} at line {1}, could'
                            ' not read coordinates'.format(repr(key), i+1))
                continue
            success += 1
        LOGGER.info('{0} out of {1} altloc {2} lines were parsed.'
                    .format(success, len(lines), repr(key)))
        if success > 0:
            LOGGER.info('Altloc {0} is appended as a coordinate set to '
                        'atomgroup {1}.'.format(repr(key), atomgroup.getTitle()))
            atomgroup.addCoordset(xyz, label='altloc ' + key)
示例#14
0
def showMeanMechStiff(model, coords, header, chain='A', *args, **kwargs):
    """Show mean value of effective spring constant with secondary structure
    taken from MechStiff. Header is needed to obatin secondary structure range.
    Using ``'jet_r'`` as argument color map will be reverse (similar to VMD 
    program coding).
    """
    meanStiff = np.array([np.mean(model.getStiffness(), axis=0)])
    import matplotlib
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    fig=plt.figure(figsize=[18,6], facecolor='w', dpi=100)
    
    if 'jet_r' in kwargs:
       import matplotlib.cm as plt
       kwargs['jet_r'] = 'cmap=cm.jet_r'
    if 'nearest' in kwargs:
        kwargs['nearest'] = 'interpolation=nearest'

    with plt.style.context('fivethirtyeight'):
        ax = fig.add_subplot(111)
        matplotlib.rcParams['font.size'] = '24'
        plt.plot(np.arange(len(meanStiff[0]))+coords.getResnums()[0],meanStiff[0], 'k-', linewidth = 3)
        plt.xlim(coords.getResnums()[0], coords.getResnums()[-1])
        ax_top=round(np.max(meanStiff[0])+((np.max(meanStiff[0])-np.min(meanStiff[0]))/3))
        ax_bottom=np.floor(np.min(meanStiff[0]))
        LOGGER.info('The range of mean effective force constant is: {0} to {1}.'
                                           .format(min(meanStiff[0]), max(meanStiff[0])))
        plt.ylim(ax_bottom,ax_top)
        plt.xlabel('residue', fontsize = '22')
        plt.ylabel('mean $\kappa$ [a.u.]', fontsize = '22')

    ax = fig.add_subplot(411, aspect='equal')
    plt.imshow(meanStiff, *args, **kwargs)
    header_ss = header['sheet_range'] + header['helix_range']
    for i in range(len(header_ss)):
        if header_ss[i][1] == chain:
            beg = int(header_ss[i][-2])-coords.getResnums()[0]
            end = int(header_ss[i][-1])-coords.getResnums()[0]
            add_beg = end - beg
            if header_ss[i][0] == 'H':
                ax.add_patch(patches.Rectangle((beg-1,-0.7),add_beg,\
                1.4,fill=False, linestyle='solid',edgecolor='#b22683', linewidth=2))    
            elif header_ss[i][0] == 'E':
                if header_ss[i][2] == -1:    
                    ax.add_patch(patches.Arrow(beg-1,0,add_beg,0,width=4.65, \
                    fill=False, linestyle='solid',edgecolor='black', linewidth=2))
                else: 
                    ax.add_patch(patches.Arrow(end-1,0,add_beg*(-1),0,width=4.65, \
                    fill=False, linestyle='solid',edgecolor='black', linewidth=2))
    plt.axis('off')
    ax.set_ylim(-1.7,1.7)
    if SETTINGS['auto_show']:
        showFigure()
    return plt.show
示例#15
0
def setPDBMirrorPath(path):
    """Set the path to a local PDB mirror."""
    
    if not isinstance(path, str):
        raise TypeError('path must be a string')
    if isdir(path):
        path = abspath(path)
        LOGGER.info('Local PDB mirror path is set: {0:s}'.format(repr(path)))
        SETTINGS['pdb_mirror_path'] = path
        SETTINGS.save()
    else:
        raise IOError('No such directory: {0:s}'.format(repr(path)))
示例#16
0
文件: analysis.py 项目: npabon/ProDy
def calcRankorder(matrix, zscore=False, **kwargs):
    """Returns indices of elements and corresponding values sorted in
    descending order, if *descend* is **True** (default). Can apply a zscore
    normalization; by default along *axis* - 0 such that each column has
    mean=0 and std=1.  If *zcore* analysis is used, return value contains the
    zscores. If matrix is smymetric only lower triangle indices will be
    returned, with diagonal elements if *diag* is **True** (default)."""

    try:
        ndim, shape = matrix.ndim, matrix.shape
    except AttributeError:
        raise TypeError('matrix must be a 2D array')

    if ndim != 2:
        raise ValueError('matrix must be a 2D array')

    threshold = kwargs.get('thredhold', 0.0001)
    try:
        symm = abs((matrix.transpose() - matrix).max()) < threshold
    except:
        symm = False

    if zscore:
        axis = int(bool(kwargs.get('axis', 0)))
        matrix = (matrix - matrix.mean(axis)) / matrix.std(axis)
        LOGGER.info('Zscore normalization has been applied.')

    descend = kwargs.get('descend', True)
    if not symm:
        if descend:
            sorted_index = matrix.argsort(axis=None)[::-1]
        else:
            sorted_index = matrix.argsort(axis=None)
        row = indices(shape)[0].flatten()[sorted_index]
        column = indices(shape)[1].flatten()[sorted_index]
    else:
        LOGGER.info('Matrix is symmetric, only lower triangle indices '
                    'will be returned.')
        if kwargs.get('diag', True):
            k = 0
        else:
            k = -1
        ind_row, ind_column = tril_indices(shape[0], k=k)
        matrix_lt = matrix[ind_row, ind_column]
        if descend:
            sorted_index = matrix_lt.argsort(axis=None)[::-1]
        else:
            sorted_index = matrix_lt.argsort(axis=None)
        row = ind_row[sorted_index]
        column = ind_column[sorted_index]

    return (row, column, matrix[row, column])
示例#17
0
def calcMechStiff(modes, coords, kbt=1.):
    """Calculate stiffness matrix calculated using :class:`.ANM` instance. 
    Method described in [EB08]_. 

    :arg coords: a coordinate set or an object with ``getCoords`` method
    :type coords: :class:`numpy.ndarray`.

    :arg n_modes: number of non-zero eigenvalues/vectors to calculate.
        If **None** is given, all modes will be calculated (3x number of atoms).
    :type n_modes: int or **None**, default is 20.
    
    Author: Mustafa Tekpinar & Karolina Mikulska-Ruminska & Cihan Kaya
    """

    try:
        coords = (coords._getCoords() if hasattr(coords, '_getCoords') else
                    coords.getCoords())
    except AttributeError:
        try:
            checkCoords(coords)
        except TypeError:
            raise TypeError('coords must be a Numpy array or an object '
                            'with `getCoords` method')
    try:
        is3d = modes.is3d()
        eigvecs = modes.getArray().T.flatten()
        eigvals = modes.getEigvals()
    except:
        raise TypeError('modes must be either an NMA or ModeSet object')

    if not is3d:
        raise TypeError('modes must be 3-dimensional')

    n_atoms = modes.numAtoms()
    n_modes = modes.numModes()
    
    LOGGER.timeit('_sm')

    sm = np.zeros((n_atoms, n_atoms), np.double)
    from .smtools import calcSM
    LOGGER.info('Calculating stiffness matrix.')

    calcSM(coords, sm, eigvecs, eigvals,
            n_atoms, n_modes, float(kbt))

    LOGGER.report('Stiffness matrix calculated in %.2lfs.', label='_sm')
    
    LOGGER.info('The range of effective force constant is: {0} to {1}.'
                                .format(*calcStiffnessRange(sm)))

    return sm
示例#18
0
    def buildMechStiff(self, coords, n_modes=None, kbt=1.):

        """Calculate stiffness matrix calculated using :class:`.ANM` instance. 
        Method described in [EB08]_. 
    
        .. [EB08] Eyal E., Bahar I. Toward a Molecular Understanding of 
            the Anisotropic Response of Proteins to External Forces:
            Insights from Elastic Network Models. *Biophys J* **2008** 94:3424-34355. 
    
        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`.
        :arg n_modes: number of non-zero eigenvalues/vectors to calculate.
            If ``None`` is given, all modes will be calculated (3x number of atoms).
        :type n_modes: int or ``None``, default is 20.
        
        Author: Mustafa Tekpinar & Karolina Mikulska-Ruminska & Cihan Kaya
        """

        try:
            coords = (coords._getCoords() if hasattr(coords, '_getCoords') else
                      coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')
        n_atoms = natoms = self._n_atoms
        n_modes = 3 * n_atoms

        self.calcModes(n_modes=None, zeros=True)
        
        LOGGER.timeit('_sm')
        eigvecs = (np.transpose(self._array)).flatten()
        eigvals = np.transpose(self._eigvals)
        natoms = n_atoms

        sm = np.zeros((n_atoms, n_atoms), np.double)
        from .smtools import calcSM
        LOGGER.info('Calculating stiffness matrix.')

        calcSM(coords, sm, eigvecs, eigvals,
                natoms, n_modes, float(kbt))

        LOGGER.report('Stiffness matrix calculated in %.2lfs.', label='_sm')

        self._stiffness = sm
        
        LOGGER.info('The range of effective force constant is: {0} to {1}.'
                                   .format(np.min(sm[np.nonzero(sm)]), np.amax(sm)))
示例#19
0
文件: pca.py 项目: fongchun/ProDy
    def calcModes(self, n_modes=20, turbo=True):
        """Calculate principal (or essential) modes.  This method uses
        :func:`scipy.linalg.eigh`, or :func:`numpy.linalg.eigh`, function
        to diagonalize the covariance matrix.

        :arg n_modes: number of non-zero eigenvalues/vectors to calculate,
            default is 20,
            if **None** or ``'all'`` is given, all modes will be calculated
        :type n_modes: int

        :arg turbo: when available, use a memory intensive but faster way to
            calculate modes, default is **True**
        :type turbo: bool"""
        
        linalg = importLA()
        if self._cov is None:
            raise ValueError('covariance matrix is not built or set')
        start = time.time()
        dof = self._dof
        self._clear()
        if str(n_modes).lower() == 'all':
            n_modes = None
        if linalg.__package__.startswith('scipy'):
            if n_modes is None:
                eigvals = None
                n_modes = dof
            else:
                n_modes = int(n_modes)
                if n_modes >= self._dof:
                    eigvals = None
                    n_modes = dof
                else:
                    eigvals = (dof - n_modes, dof - 1)
            values, vectors = linalg.eigh(self._cov, turbo=turbo,
                                          eigvals=eigvals)
        else:
            if n_modes is not None:
                LOGGER.info('Scipy is not found, all modes are calculated.')
            values, vectors = linalg.eigh(self._cov)
        # Order by descending SV
        revert = list(range(len(values)-1, -1, -1))
        values = values[revert]
        vectors = vectors[:, revert]
        which = values > 1e-8
        self._eigvals = values[which]
        self._array = vectors[:, which]
        self._vars = self._eigvals
        self._n_modes = len(self._eigvals)
        LOGGER.debug('{0} modes were calculated in {1:.2f}s.'
                     .format(self._n_modes, time.time()-start))
示例#20
0
文件: gnm.py 项目: sixpi/ProDy
    def getNormDistFluct(self, coords):
        """Normalized distance fluctuation
        """
            
        model = self.getModel()
        LOGGER.info('Number of chains: {0}, chains: {1}.'
                     .format(len(list(set(coords.getChids()))), \
                                 list(set(coords.getChids()))))

        try:
            #coords = coords.select('protein and name CA')
            coords = (coords._getCoords() if hasattr(coords, '_getCoords') else
                coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                                'with `getCoords` method')
        
        if not isinstance(model, NMA):
            LOGGER.info('Calculating new model')
            model = GNM('prot analysis')
            model.buildKirchhoff(coords)
            model.calcModes() 
            
        linalg = importLA()
        n_atoms = model.numAtoms()
        n_modes = model.numModes()
        LOGGER.timeit('_ndf')
    
        from .analysis import calcCrossCorr
        from numpy import linalg as LA
        # <dRi, dRi>, <dRj, dRj> = 1
        crossC = 2-2*calcCrossCorr(model)
        r_ij = np.zeros((n_atoms,n_atoms,3))

        for i in range(n_atoms):
           for j in range(i+1,n_atoms):
               r_ij[i][j] = coords[j,:] - coords[i,:]
               r_ij[j][i] = r_ij[i][j]
               r_ij_n = LA.norm(r_ij, axis=2)

        #with np.errstate(divide='ignore'):
        r_ij_n[np.diag_indices_from(r_ij_n)] = 1e-5  # div by 0
        crossC=abs(crossC)
        normdistfluct = np.divide(np.sqrt(crossC),r_ij_n)
        LOGGER.report('NDF calculated in %.2lfs.', label='_ndf')
        normdistfluct[np.diag_indices_from(normdistfluct)] = 0  # div by 0
        return normdistfluct
示例#21
0
def parseDCD(filename, start=None, stop=None, step=None):
    """Parse CHARMM format DCD files (also NAMD 2.1 and later).  Returns an 
    :class:`Ensemble` instance. Conformations in the ensemble will be ordered 
    as they appear in the trajectory file.  Use :class:`DCDFile` class for 
    parsing  coordinates of a subset of atoms.
    
    :arg filename: DCD filename
    :type filename: str
    
    :arg start: index of first frame to read
    :type start: int
        
    :arg stop: index of the frame that stops reading
    :type stop: int
        
    :arg step: steps between reading frames, default is 1 meaning every frame
    :type step: int"""
    
    dcd = DCDFile(filename)
    time_ = time()
    n_frames = dcd.numFrames()
    LOGGER.info('DCD file contains {0:d} coordinate sets for {1:d} atoms.'
                .format(n_frames, dcd.numAtoms()))
    ensemble = dcd[slice(start,stop,step)]    
    dcd.close()
    time_ = time() - time_ or 0.01
    dcd_size = 1.0 * dcd.numFrames() * dcd._bytes_per_frame / (1024*1024)
    LOGGER.info('DCD file was parsed in {0:.2f} seconds.'.format(time_))
    LOGGER.info('{0:.2f} MB parsed at input rate {1:.2f} MB/s.'
                .format(dcd_size, dcd_size/time_))
    LOGGER.info('{0:d} coordinate sets parsed at input rate {1:d} frame/s.'
                .format(n_frames, int(n_frames/time_)))
    return ensemble
示例#22
0
文件: emdfile.py 项目: prody/ProDy
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Any object with the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))
    map = kwargs.get('map',True)
    make_nodes = kwargs.get('make_nodes',False)

    if map is False and make_nodes is False:
        LOGGER.warn('At least one of map and make_nodes should be True. '
                    'Setting map to False was an intentional change from the default '
                    'behaviour so make_nodes has been set to True.')
        make_nodes = True

    title_suffix = kwargs.get('title_suffix','')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
    atomgroup._n_atoms = n_nodes

    if make_nodes:
        LOGGER.info('Building coordinates from electron density map. This may take a while.')
        LOGGER.timeit()

        if map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, map=map, make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, map=map, make_nodes=make_nodes)

        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else: 
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, map=map, make_nodes=make_nodes)

    if make_nodes:
        if map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd
示例#23
0
    def getNormDistFluct(self, coords):
        """Normalized distance fluctuation
        """

        model = self.getModel()
        LOGGER.info('Number of chains: {0}, chains: {1}.'
                     .format(len(list(set(coords.getChids()))), \
                                 list(set(coords.getChids()))))

        try:
            #coords = coords.select('protein and name CA')
            coords = (coords._getCoords()
                      if hasattr(coords, '_getCoords') else coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        if not isinstance(model, NMA):
            LOGGER.info('Calculating new model')
            model = GNM('prot analysis')
            model.buildKirchhoff(coords)
            model.calcModes()

        LA = importLA()
        n_atoms = model.numAtoms()
        LOGGER.timeit('_ndf')

        from .analysis import calcCrossCorr
        # <dRi, dRi>, <dRj, dRj> = 1
        crossC = 2 - 2 * calcCrossCorr(model)
        r_ij = np.zeros((n_atoms, n_atoms, 3))

        for i in range(n_atoms):
            for j in range(i + 1, n_atoms):
                r_ij[i][j] = coords[j, :] - coords[i, :]
                r_ij[j][i] = r_ij[i][j]

        r_ij_n = LA.norm(r_ij, axis=2)

        #with np.errstate(divide='ignore'):
        r_ij_n[np.diag_indices_from(r_ij_n)] = ZERO  # div by 0
        crossC = abs(crossC)
        normdistfluct = np.divide(np.sqrt(crossC), r_ij_n)
        LOGGER.report('NDF calculated in %.2lfs.', label='_ndf')
        normdistfluct[np.diag_indices_from(normdistfluct)] = 0  # div by 0
        return normdistfluct
示例#24
0
文件: anm.py 项目: creageng/ProDy
    def buildMechStiff(self, coords, n_modes=None, kbt=1.):
        """Calculate stiffness matrix calculated using :class:`.ANM` instance. 
        Method described in [EB08]_. 
    
        .. [EB08] Eyal E., Bahar I. Toward a Molecular Understanding of 
            the Anisotropic Response of Proteins to External Forces:
            Insights from Elastic Network Models. *Biophys J* **2008** 94:3424-34355. 
    
        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`.
        :arg n_modes: number of non-zero eigenvalues/vectors to calculate.
            If ``None`` is given, all modes will be calculated (3x number of atoms).
        :type n_modes: int or ``None``, default is 20.
        
        Author: Mustafa Tekpinar & Karolina Mikulska-Ruminska & Cihan Kaya
        """

        try:
            coords = (coords._getCoords()
                      if hasattr(coords, '_getCoords') else coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')
        n_atoms = natoms = self._n_atoms
        n_modes = 3 * n_atoms

        self.calcModes(n_modes=None, zeros=True)

        LOGGER.timeit('_sm')
        eigvecs = (np.transpose(self._array)).flatten()
        eigvals = np.transpose(self._eigvals)
        natoms = n_atoms

        sm = np.zeros((n_atoms, n_atoms), np.double)
        from .smtools import calcSM
        LOGGER.info('Calculating stiffness matrix.')

        calcSM(coords, sm, eigvecs, eigvals, natoms, n_modes, float(kbt))

        LOGGER.report('Stiffness matrix calculated in %.2lfs.', label='_sm')

        self._stiffness = sm

        LOGGER.info(
            'The range of effective force constant is: {0} to {1}.'.format(
                np.min(sm[np.nonzero(sm)]), np.amax(sm)))
示例#25
0
def evol_refine(msa, **kwargs):

    import prody
    from prody import parseMSA, refineMSA, writeMSA, LOGGER
    from os.path import splitext

    outname = kwargs.get('outname')
    if outname is None:
        outname, ext = splitext(msa)
        if ext.lower() == '.gz':
            outname, _ = splitext(msa)
        outname += '_refined' + ext

    writeMSA(outname, refineMSA(parseMSA(msa), **kwargs), **kwargs)
    LOGGER.info('Refined MSA is written in file: ' + outname)
示例#26
0
def evol_refine(msa, **kwargs):

    import prody
    from prody import parseMSA, refineMSA, writeMSA, LOGGER
    from os.path import splitext

    outname = kwargs.get('outname')
    if outname is None:
        outname, ext = splitext(msa)
        if ext.lower() == '.gz':
            outname, _ = splitext(msa)
        outname += '_refined' + ext

    writeMSA(outname, refineMSA(parseMSA(msa), **kwargs), **kwargs)
    LOGGER.info('Refined MSA is written in file: ' + outname)
示例#27
0
文件: Uniprot.py 项目: yaz62/rhapsody
 def savePickle(self, filename=None, folder=None, store_custom_PDBs=False):
     if folder is None:
         folder = SETTINGS.get('rhapsody_local_folder', '.')
     if filename is None:
         filename = 'UniprotMap-' + self.uniq_acc + '.pkl'
     pickle_path = os.path.join(folder, filename)
     cache = self.customPDBmappings
     if store_custom_PDBs is not True:
         # do not store alignments of custom PDBs
         self.customPDBmappings = []
     # save pickle
     pickle.dump(self, open(pickle_path, "wb"))
     self.customPDBmappings = cache
     LOGGER.info("Pickle '{}' saved.".format(filename))
     return pickle_path
示例#28
0
def evol_refine(msa, **kwargs):

    import prody
    from prody import parseMSA, refineMSA, writeMSA, LOGGER
    from os.path import splitext

    outname = kwargs.get("outname")
    if outname is None:
        outname, ext = splitext(msa)
        if ext.lower() == ".gz":
            outname, _ = splitext(msa)
        outname += "_refined" + ext

    writeMSA(outname, refineMSA(parseMSA(msa), **kwargs), **kwargs)
    LOGGER.info("Refined MSA is written in file: " + outname)
示例#29
0
    def calcModes(self, n_modes=20, turbo=True):
        """Calculate principal (or essential) modes.  This method uses
        :func:`scipy.linalg.eigh`, or :func:`numpy.linalg.eigh`, function
        to diagonalize the covariance matrix.

        :arg n_modes: number of non-zero eigenvalues/vectors to calculate,
            default is 20, for **None** all modes will be calculated
        :type n_modes: int

        :arg turbo: when available, use a memory intensive but faster way to
            calculate modes, default is **True**
        :type turbo: bool"""

        linalg = importLA()
        if self._cov is None:
            raise ValueError('covariance matrix is not built or set')
        start = time.time()
        dof = self._dof
        if linalg.__package__.startswith('scipy'):
            if n_modes is None:
                eigvals = None
                n_modes = dof
            else:
                n_modes = int(n_modes)
                if n_modes >= self._dof:
                    eigvals = None
                    n_modes = dof
                else:
                    eigvals = (dof - n_modes, dof - 1)
            values, vectors = linalg.eigh(self._cov,
                                          turbo=turbo,
                                          eigvals=eigvals)
        else:
            if n_modes is not None:
                LOGGER.info('Scipy is not found, all modes are calculated.')
            values, vectors = linalg.eigh(self._cov)
        # Order by descending SV
        revert = list(range(len(values) - 1, -1, -1))
        values = values[revert]
        vectors = vectors[:, revert]
        which = values > 1e-8
        self._eigvals = values[which]
        self._array = vectors[:, which]
        self._vars = self._eigvals
        self._n_modes = len(self._eigvals)
        LOGGER.debug('{0} modes were calculated in {1:.2f}s.'.format(
            self._n_modes,
            time.time() - start))
示例#30
0
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))
    map = kwargs.get('map',False)
    make_nodes = kwargs.get('make_nodes',True)

    if map is False and make_nodes is False:
        LOGGER.warn('At least one of map and make_nodes should be True. '
                    'Setting make_nodes to False was an intentional change from the default '
                    'behaviour so map has been set to True.')
        map = True

    title_suffix = kwargs.get('title_suffix','')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)

    if make_nodes:
        LOGGER.info('Building coordinates from electron density map. This may take a while.')
        LOGGER.timeit()

        if map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, map=map, make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, map=map, make_nodes=make_nodes)

        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else: 
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, map=map, make_nodes=make_nodes)

    if make_nodes:
        if map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd
示例#31
0
文件: Uniprot.py 项目: yaz62/rhapsody
 def recoverPickle(self, filename=None, folder=None, days=30, **kwargs):
     acc = self.uniq_acc
     if acc is None:
         # assume acc is equal to uniq_acc
         acc = self.acc
     if folder is None:
         folder = SETTINGS.get('rhapsody_local_folder', '.')
     if filename is None:
         # assume acc is equal to uniq_acc
         acc = self.acc
         filename = 'UniprotMap-' + acc + '.pkl'
         pickle_path = os.path.join(folder, filename)
         if not os.path.isfile(pickle_path):
             # import unique accession number
             acc = queryUniprot(self.acc)['accession   0']
             filename = 'UniprotMap-' + acc + '.pkl'
             pickle_path = os.path.join(folder, filename)
     else:
         pickle_path = os.path.join(folder, filename)
     # check if pickle exists
     if not os.path.isfile(pickle_path):
         raise IOError("File '{}' not found".format(filename))
     # load pickle
     recovered_self = pickle.load(open(pickle_path, "rb"))
     if acc not in [recovered_self.acc, recovered_self.uniq_acc]:
         raise ValueError('Accession number in recovered pickle (%s) ' %
                          recovered_self.uniq_acc + 'does not match.')
     # check timestamp and ignore pickles that are too old
     date_format = "%Y-%m-%d %H:%M:%S.%f"
     t_old = datetime.datetime.strptime(recovered_self._timestamp,
                                        date_format)
     t_now = datetime.datetime.utcnow()
     Delta_t = datetime.timedelta(days=days)
     if t_old + Delta_t < t_now:
         raise RuntimeError(
             'Pickle {} was too old and was ignored.'.format(filename))
     self.fullRecord = recovered_self.fullRecord
     self.uniq_acc = recovered_self.uniq_acc
     self.sequence = recovered_self.sequence
     self.PDBrecords = recovered_self.PDBrecords
     self.PDBmappings = recovered_self.PDBmappings
     self.customPDBmappings = recovered_self.customPDBmappings
     self._align_algo_args = recovered_self._align_algo_args
     self._align_algo_kwargs = recovered_self._align_algo_kwargs
     self._timestamp = recovered_self._timestamp
     self.Pfam = recovered_self.Pfam
     LOGGER.info("Pickle '{}' recovered.".format(filename))
     return
示例#32
0
文件: PDB.py 项目: shulp2211/rhapsody
    def savePickle(self, folder=None, filename=None):
        """Stores a pickle of the current class instance. The pickle will
        contain all information and precomputed features, but not GNM and ANM
        models. In case a PDBID is missing, the parsed PDB :class:`AtomGroup`
        is stored as well.

        :arg folder: path of the folder where the pickle will be saved. If not
            specified, the local Rhapsody installation folder will be used.
        :type folder: str
        :arg filename: name of the pickle. By default, the pickle will be
            saved as ``'PDBfeatures-[PDBID].pkl'``. If a PDBID is not defined,
            the user must provide a filename.
        :type filename: str
        :return: pickle path
        :rtype: str
        """
        if folder is None:
            # define folder where to look for pickles
            folder = SETTINGS.get('rhapsody_local_folder')
            if folder is None:
                folder = '.'
            else:
                folder = os.path.join(folder, 'pickles')
        if filename is None:
            # use the default filename, if possible
            if self.PDBID is None:
                # when a custom structure is used, there is no
                # default filename: the user should provide it
                raise ValueError('Please provide a filename.')
            filename = 'PDBfeatures-' + self.PDBID + '.pkl'
        pickle_path = os.path.join(folder, filename)
        # do not store GNM and ANM instances.
        # If a valid PDBID is present, do not store parsed PDB
        # as well, since it can be easily fetched again
        cache = (self._pdb, self._gnm, self._anm)
        if self.PDBID is not None:
            self._pdb = None
        self._gnm = {}
        self._anm = {}
        for env in ['chain', 'reduced', 'sliced']:
            self._gnm[env] = {chID: None for chID in self.chids}
            self._anm[env] = {chID: None for chID in self.chids}
        # write pickle
        pickle.dump(self, open(pickle_path, "wb"))
        # restore temporarily cached data
        self._pdb, self._gnm, self._anm = cache
        LOGGER.info("Pickle '{}' saved.".format(filename))
        return pickle_path
示例#33
0
    def __add__(self, other):

        if not isinstance(other, AtomGroup):
            raise TypeError('unsupported operand type(s) for +: {0} and '
                            '{1}'.format(repr(type(self).__name__),
                                         repr(type(other).__name__)))

        new = AtomGroup(self._title + ' + ' + other._title)
        if self._n_csets:
            if self._n_csets == other._n_csets:
                new.setCoords(np.concatenate((self._coords, other._coords), 1))
                if self._n_csets > 1:
                    LOGGER.info('All {0} coordinate sets are copied to '
                                '{1}.'.format(self._n_csets, new.getTitle()))
            else:
                new.setCoords(np.concatenate((self._getCoords(),
                                              other._getCoords())))
                LOGGER.info('Active coordinate sets are copied to {0}.'
                            .format(new.getTitle()))
        elif other._n_csets:
            LOGGER.warn('No coordinate sets are copied to {0}'
                        .format(new.getTitle()))

        for key in set(list(self._data) + list(other._data)):
            if key in ATOMIC_FIELDS and ATOMIC_FIELDS[key].readonly:
                continue
            this = self._data.get(key)
            that = other._data.get(key)
            if this is not None or that is not None:
                if this is None:
                    shape = list(that.shape)
                    shape[0] = len(self)
                    this = np.zeros(shape, that.dtype)
                if that is None:
                    shape = list(this.shape)
                    shape[0] = len(other)
                    that = np.zeros(shape, this.dtype)
                new._data[key] = np.concatenate((this, that))

        if self._bonds is not None and other._bonds is not None:
            new.setBonds(np.concatenate([self._bonds,
                                         other._bonds + self._n_atoms]))
        elif self._bonds is not None:
            new.setBonds(self._bonds.copy())
        elif other._bonds is not None:
            new.setBonds(other._bonds + self._n_atoms)

        return new
示例#34
0
def queryUniprot(*args, n_attempts=3, dt=1, **kwargs):
    """
    Redefine prody function to check for no internet connection
    """
    attempt = 0
    while attempt < n_attempts:
        try:
            _ = openURL('http://www.uniprot.org/')
            break
        except:
            LOGGER.info(f'Attempt {attempt} to contact www.uniprot.org failed')
            attempt += 1
            time.sleep((attempt + 1) * dt)
    else:
        _ = openURL('http://www.uniprot.org/')
    return pd.queryUniprot(*args, **kwargs)
示例#35
0
文件: clustenm.py 项目: SHZ66/ProDy
    def setAtoms(self, atoms, pH=7.0):

        '''
        Sets atoms.
        
        :arg atoms: *atoms* parsed by parsePDB

        :arg pH: pH based on which to select protonation states for adding missing hydrogens, default is 7.0.
        :type pH: float
        '''

        atoms = atoms.select('not hetatm')

        self._nuc = atoms.select('nucleotide')

        if self._nuc is not None:

            idx_p = []
            for c in self._nuc.getChids():
                tmp = self._nuc[c].iterAtoms()
                for a in tmp:
                    if a.getName() in ['P', 'OP1', 'OP2', 'OP3']:
                        idx_p.append(a.getIndex())

            if idx_p:
                nsel = 'not index ' + ' '.join([str(i) for i in idx_p])
                atoms = atoms.select(nsel)

        if self._isBuilt():
            super(ClustENM, self).setAtoms(atoms)
        else:
            LOGGER.info('Fixing the structure ...')
            LOGGER.timeit('_clustenm_fix')
            self._ph = pH
            self._fix(atoms)
            LOGGER.report('The structure was fixed in %.2fs.',
                          label='_clustenm_fix')

            if self._nuc is None:
                self._idx_cg = self._atoms.ca.getIndices()
                self._n_cg = self._atoms.ca.numAtoms()
            else:
                self._idx_cg = self._atoms.select("name CA C2 C4' P").getIndices()
                self._n_cg = self._atoms.select("name CA C2 C4' P").numAtoms()

            self._n_atoms = self._atoms.numAtoms()
            self._indices = None
示例#36
0
文件: cath.py 项目: nffaruk/ProDy
    def update(self, source=None):
        """Update data and files from CATH."""

        self._source = source = self._source or source
        self.reset()
        if source is None:
            return

        LOGGER.timeit('_cath_update')

        type_ = 0
        tree = None
        if isinstance(source, str):
            if isfile(source):
                type_ = 1
            elif isURL(source):
                type_ = 0
            else:
                type_ = 2
        elif hasattr(source, 'read'):
            type_ = 1
        else:
            raise TypeError(
                'source must be either an url, file name, file handle, '
                'or text in xml format')

        if type_ == 0:
            LOGGER.info('Fetching data from CATH...')
            self._fetch()

            LOGGER.info('Parsing CATH files...')
            self._parse()
        elif type_ == 1:
            LOGGER.info('Reading data from the local xml file...')
            tree = ET.parse(source)
        elif type_ == 2:
            LOGGER.info('Parsing input string...')
            tree = ET.fromstring(source)

        # post-processing
        if type_ > 0:
            root = tree.getroot()
            nodes = root.iter()

            # remove prefix from node tags
            for node in nodes:
                node.tag = node.tag.lstrip('id.')

            # convert int to str
            length_nodes = root.findall('.//*[@length]')
            for node in length_nodes:
                node.attrib['length'] = int(node.attrib['length'])

            copy2(root, self.root)
            self._update_map()

        LOGGER.report('CATH local database built in %.2fs.', '_cath_update')
示例#37
0
def prody_select(selstr, *pdbs, **kwargs):
    """Write selected atoms from a PDB file in PDB format.

    :arg selstr: atom selection string, see :ref:`selections`

    :arg pdbs: PDB identifier(s) or filename(s)

    :arg output: output filename, default is :file:`pdb_selected.pdb`

    :arg prefix: prefix for output file, default is PDB filename

    :arg suffix: output filename suffix, default is :file:`_selected`"""

    from os.path import isfile
    from prody import LOGGER, parsePDB, writePDB

    #selstr = kwargs.get('selstr')
    if not pdbs:
        raise ValueError('pdb argument must be provided')

    if ((isfile(selstr) or len(selstr) == 4 and selstr[0].isdigit())
            and len(pdbs) == 1 and not isfile(pdbs[0])):
        pdbs, selstr = selstr, pdbs[0]
        LOGGER.warn('The order of selstr and pdb arguments have switched '
                    'to support multiple files, old order will be supported '
                    'until v1.4.')
        pdbs = [pdbs]

    prefix = kwargs.get('prefix', None)
    suffix = kwargs.get('suffix', '_selected')
    output = kwargs.get('output', None)
    altloc = kwargs.get('altloc', None)

    for pdb in pdbs:
        pdb = parsePDB(pdb, altloc=altloc)

        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            LOGGER.warn('Selection {0} did not match any atoms.'.format(
                repr(selstr)))
            return
        LOGGER.info('Selection {0} matched {1} atoms.'.format(
            repr(selstr), len(pdbselect)))

        outname = output or ((prefix or pdb.getTitle()) + suffix)
        LOGGER.info('Selection is written into: ' +
                    writePDB(outname, pdbselect))
示例#38
0
 def recoverPickle(self, folder=None, filename=None, days=30, **kwargs):
     if folder is None:
         # define folder where to look for pickles
         folder = SETTINGS.get('rhapsody_local_folder', '.')
     if filename is None:
         # use the default filename, if possible
         if self.PDBID is not None:
             filename = 'PDBfeatures-' + self.PDBID + '.pkl'
         else:
             # when a custom structure is used, there is no
             # default filename: the user should provide it
             raise ValueError('Please provide a filename.')
     pickle_path = os.path.join(folder, filename)
     if not os.path.isfile(pickle_path):
         raise IOError("File '{}' not found".format(filename))
     recovered_self = pickle.load(open(pickle_path, "rb"))
     # check consistency of recovered data
     if self.PDBID is None:
         if self._pdb != recovered_self._pdb:
             raise ValueError(
                 'Incompatible PDB structure in recovered pickle.')
     elif self.PDBID != recovered_self.PDBID:
         raise ValueError(
             'PDBID in recovered pickle ({}) does not match.'.format(
                 recovered_self.PDBID))
     if self.n_modes != recovered_self.n_modes:
         raise ValueError(
             'Num. of modes in recovered pickle ({}) does not match.'.
             format(recovered_self.n_modes))
     # check timestamp and ignore pickles that are too old
     date_format = "%Y-%m-%d %H:%M:%S.%f"
     t_old = datetime.datetime.strptime(recovered_self.timestamp,
                                        date_format)
     t_now = datetime.datetime.utcnow()
     Delta_t = datetime.timedelta(days=days)
     if t_old + Delta_t < t_now:
         raise RuntimeError('Pickle was too old and was ignored.')
     # import recovered data
     self.chids = recovered_self.chids
     self.resids = recovered_self.resids
     self.feats = recovered_self.feats
     self._gnm = recovered_self._gnm
     self._anm = recovered_self._anm
     self.timestamp = recovered_self.timestamp
     LOGGER.info("Pickle '{}' recovered.".format(filename))
     return
示例#39
0
文件: ensemble.py 项目: kaynakb/ProDy
    def __add__(self, other):
        """Concatenate ensembles. The reference coordinates, atoms, 
        and weights of *self* is used in the resulting ensemble."""

        if not isinstance(other, Ensemble):
            raise TypeError('an Ensemble instance cannot be added to an {0} '
                            'instance'.format(type(other)))
        elif self._n_atoms != other._n_atoms:
            raise ValueError('Ensembles must have same number of atoms.')

        ensemble = type(self)('{0} + {1}'.format(self.getTitle(),
                                                 other.getTitle()))
        if self._coords is not None:
            ensemble.setCoords(self._coords.copy())
        if self._confs is not None:
            ensemble.addCoordset(self._confs.copy())
        if other._confs is not None:
            ensemble.addCoordset(other._confs.copy())

        all_keys = set(list(self._data.keys()) + list(other._data.keys()))
        for key in all_keys:
            if key in self._data and key in other._data:
                self_data = self._data[key]
                other_data = other._data[key]
            elif key in self._data:
                self_data = self._data[key]
                other_data = zeros(other.numConfs(), dtype=self_data.dtype)
            elif key in other._data:
                other_data = other._data[key]
                self_data = zeros(other.numConfs(), dtype=other_data.dtype)
            ensemble._data[key] = concatenate((self_data, other_data), axis=0)

        if self._weights is not None:
            LOGGER.info('Atom weights from {0} are used in {1}.'.format(
                repr(self._title), repr(ensemble.getTitle())))
            ensemble.setWeights(self._weights.copy())
        elif other._weights is not None:
            ensemble.setWeights(other._weights.copy())

        if self._atoms is not None:
            ensemble.setAtoms(self._atoms)
            ensemble._indices = self._indices
        else:
            ensemble.setAtoms(other._atoms)
            ensemble._indices = other._indices
        return ensemble
示例#40
0
    def calcProjection(self, coords, blocks, **kwargs):
        natoms = self._n_atoms

        if natoms != len(blocks):
            raise ValueError('len(blocks) must match number of atoms')

        LOGGER.timeit('_rtb')
        from collections import defaultdict
        i = Increment()
        d = defaultdict(i)
        blocks = np.array([d[b] for b in blocks], dtype='int32')

        try:
            from collections import Counter
        except ImportError:
            counter = defaultdict(int)
            for b in blocks:
                counter[b] += 1
        else:
            counter = Counter(blocks)

        nblocks = len(counter)
        maxsize = 1
        nones = 0
        while counter:
            _, size = counter.popitem()
            if size == 1:
                nones += 1
            if size > maxsize:
                maxsize = size
        LOGGER.info('System has {0} blocks largest with {1} of {2} units.'
                    .format(nblocks, maxsize, natoms))
        nb6 = nblocks * 6 - nones * 3

        coords = coords.T.astype(float, order='C')

        hessian = self._hessian
        self._project = project = np.zeros((natoms * 3, nb6), float)

        from .rtbtools import calc_projection

        calc_projection(coords, blocks, project, natoms, nblocks, nb6, maxsize)

        self._hessian = project.T.dot(hessian).dot(project)
        self._dof = self._hessian.shape[0]
        LOGGER.report('Block Hessian and projection matrix were calculated in %.2fs.', label='_rtb')
示例#41
0
def prody_select(selstr, *pdbs, **kwargs):
    """Write selected atoms from a PDB file in PDB format.
    
    :arg selstr: atom selection string, see :ref:`selections`
    
    :arg pdbs: :term:`PDB` identifier(s) or filename(s)
    
    :arg output: output filename, default is :file:`pdb_selected.pdb`

    :arg prefix: prefix for output file, default is PDB filename
    
    :arg suffix: output filename suffix, default is :file:`_selected`"""

    from os.path import isfile
    from prody import LOGGER, parsePDB, writePDB
    
    #selstr = kwargs.get('selstr')
    if not pdbs:
        raise ValueError('pdb argument must be provided')

    if ((isfile(selstr) or len(selstr) == 4 and selstr[0].isdigit()) and 
        len(pdbs) == 1 and not isfile(pdbs[0])):
        pdbs, selstr = selstr, pdbs[0]
        LOGGER.warn('The order of selstr and pdb arguments have switched '
                    'to support multiple files, old order will be supported '
                    'until v1.4.')
        pdbs = [pdbs]

    prefix = kwargs.get('prefix', None)
    suffix = kwargs.get('suffix', '_selected')
    output = kwargs.get('output', None)
    
    for pdb in pdbs:    
        pdb = parsePDB(pdb)
            
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            LOGGER.warn('Selection {0:s} did not match any atoms.'
                        .format(repr(selstr)))
            return
        LOGGER.info('Selection {0:s} matched {1:d} atoms.'
                    .format(repr(selstr), len(pdbselect)))

        outname = output or ((prefix or pdb.getTitle()) + suffix)
        LOGGER.info('Selection is written into: ' + 
                    writePDB(outname, pdbselect))
示例#42
0
    def __add__(self, other):
        """Concatenate ensembles. The reference coordinates, atoms, 
        and weights of *self* is used in the resulting ensemble."""

        if not isinstance(other, Ensemble):
            raise TypeError('an Ensemble instance cannot be added to an {0} '
                            'instance'.format(type(other)))
        elif self._n_atoms != other._n_atoms:
            raise ValueError('Ensembles must have same number of atoms.')

        ensemble = Ensemble('{0} + {1}'.format(self.getTitle(),
                                               other.getTitle()))
        if self._coords is not None:
            ensemble.setCoords(self._coords.copy())
        if self._confs is not None:
            ensemble.addCoordset(self._confs.copy())
        if other._confs is not None:
            ensemble.addCoordset(other._confs.copy())

        all_keys = list(self._data.keys()) + list(other._data.keys())
        for key in all_keys:
            if key in self._data and key in other._data:
                self_data = self._data[key]
                other_data = other._data[key]
            elif key in self._data:
                self_data = self._data[key]
                other_data = zeros(other.numConfs(), dtype=self_data.dtype)
            elif key in other._data:
                other_data = other._data[key]
                self_data = zeros(other.numConfs(), dtype=other_data.dtype)
            ensemble._data[key] = concatenate((self_data, other_data), axis=0)

        if self._weights is not None:
            LOGGER.info('Atom weights from {0} are used in {1}.'
                        .format(repr(self._title), repr(ensemble.getTitle())))
            ensemble.setWeights(self._weights.copy())
        elif other._weights is not None:
            ensemble.setWeights(other._weights.copy())
        
        if self._atoms is not None:
            ensemble.setAtoms(self._atoms)
            ensemble._indices = self._indices
        else:
            ensemble.setAtoms(other._atoms)
            ensemble._indices = other._indices
        return ensemble
示例#43
0
    def getFilterList(self):
        """Returns a list of chemicals for the entries that were filtered out"""

        filterDict = self._filterDict
        if filterDict is None:
            raise ValueError(
                'You cannot obtain the list of filtered out entries before doing any filtering.'
            )

        temp_str = ', '.join([
            str(len(filterDict['lower_MW'])),
            str(len(filterDict['upper_MW'])),
            str(len(filterDict['conf_score']))
        ])
        LOGGER.info('Filtered out [' + temp_str +
                    '] for [lower weight, upper weight, confidence score]')
        return self._filterList
示例#44
0
文件: cath.py 项目: fongchun/ProDy
    def update(self, source=None):
        """Update data and files from CATH."""

        self._source = source = self._source or source
        self.reset()
        if source is None:
            return

        LOGGER.timeit('_cath_update')
        
        type_ = 0
        tree = None
        if isinstance(source, str):
            if isfile(source):
                type_ = 1
            elif isURL(source):
                type_ = 0
            else:
                type_ = 2
        elif hasattr(source, 'read'):
                type_ = 1
        else:
            raise TypeError('source must be either an url, file name, file handle, '
                            'or text in xml format')

        if type_ == 0:
            LOGGER.info('Fetching data from CATH...')
            self._fetch()

            LOGGER.info('Parsing CATH files...')
            self._parse()
        elif type_ == 1:
            LOGGER.info('Reading data from the local xml file...')
            tree = ET.parse(source)
        elif type_ == 2:
            LOGGER.info('Parsing input string...')
            tree = ET.fromstring(source)

        # post-processing
        if type_ > 0:
            root = tree.getroot()
            nodes = root.iter()

            # remove prefix from node tags
            for node in nodes:
                node.tag = node.tag.lstrip('id.')

            # convert int to str
            length_nodes = root.findall('.//*[@length]')
            for node in length_nodes:
                node.attrib['length'] = int(node.attrib['length'])
            
            copy2(root, self.root)
            self._update_map()

        LOGGER.report('CATH local database built in %.2fs.', '_cath_update')
示例#45
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of 
    the weekly clustering of protein chains in the PDB generated by blastclust. 
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/
    
    This function will download about 10 Mb of data and save it after 
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can 
    be loaded using :func:`loadPDBClusters` function and be accessed 
    using :func:`listPDBCluster`."""
    
    if sqid is not None:
        if sqid not in PDB_CLUSTERS:
            raise ValueError('sqid must be one of ' + PDB_CLUSTERS_SQID_STR)
        keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)
    
    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(PDB_CLUSTERS),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.')
            continue
        else:
            out = openFile(filename+'.gz', 'w', folder=PDB_CLUSTERS_PATH) 
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, '_prody_fetchPDBClusters')
    LOGGER.clear()
    if len(PDB_CLUSTERS) == count:
        LOGGER.info('All PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
示例#46
0
文件: ensemble.py 项目: kaynakb/ProDy
    def iterpose(self, rmsd=0.0001, quiet=False):
        """Iteratively superpose the ensemble until convergence.  Initially,
        all conformations are aligned with the reference coordinates.  Then
        mean coordinates are calculated, and are set as the new reference
        coordinates.  This is repeated until reference coordinates do not
        change.  This is determined by the value of RMSD between the new and
        old reference coordinates.  Note that at the end of the iterative
        procedure the reference coordinate set will be average of conformations
        in the ensemble.

        :arg rmsd: change in reference coordinates to determine convergence,
            default is 0.0001 Å RMSD
        :type rmsd: float"""

        if self._coords is None:
            raise AttributeError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise AttributeError('conformations are not set, use'
                                 '`addCoordset`')
        LOGGER.info('Starting iterative superposition:')
        LOGGER.timeit('_prody_ensemble')
        rmsdif = 1
        step = 0
        weights = self._weights
        length = len(self)
        if weights is not None:
            if weights.ndim == 3:
                weightsum = weights.sum(axis=0)
                weightsum[weightsum == 0.] = 1.  # add pseudocount to avoid nan
            else:
                weightsum = length

        while rmsdif > rmsd:
            self._superpose(quiet=quiet)
            if weights is None:
                newxyz = self._confs.sum(0) / length
            else:
                newxyz = (self._confs * weights).sum(0) / weightsum
            rmsdif = getRMSD(self._coords, newxyz)
            self._coords = newxyz
            step += 1
            LOGGER.info('Step #{0}: RMSD difference = {1:.4e}'.format(
                step, rmsdif))
        LOGGER.report('Iterative superposition completed in %.2fs.',
                      '_prody_ensemble')
示例#47
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of
    the weekly clustering of protein chains in the PDB generated by blastclust.
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/

    This function will download about 10 Mb of data and save it after
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can
    be loaded using :func:`loadPDBClusters` function and be accessed
    using :func:`listPDBCluster`."""

    if sqid is not None:
        if sqid not in PDB_CLUSTERS:
            raise ValueError('sqid must be one of ' + PDB_CLUSTERS_SQID_STR)
        keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(keys),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.')
            continue
        else:
            out = openFile(filename + '.gz', 'w', folder=PDB_CLUSTERS_PATH)
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, label='_prody_fetchPDBClusters')
    LOGGER.finish()
    if len(PDB_CLUSTERS) == count:
        LOGGER.info('All PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
示例#48
0
文件: dali.py 项目: minghao2016/ProDy
    def getFilterList(self):
        """Returns a list of PDB IDs and chains for the entries that were filtered out"""

        filterDict = self._filterDict
        if filterDict is None:
            raise ValueError(
                'You cannot obtain the list of filtered out entries before doing any filtering.'
            )

        temp_str = ', '.join([
            str(len(filterDict['len'])),
            str(len(filterDict['rmsd'])),
            str(len(filterDict['Z'])),
            str(len(filterDict['identity']))
        ])
        LOGGER.info('Filtered out [' + temp_str +
                    '] for [length, RMSD, Z, identity]')
        return self._filterList
示例#49
0
def evol_filter(msa, *word, **kwargs):

    import prody
    from prody import MSAFile, writeMSA, LOGGER
    from os.path import splitext

    outname = kwargs.get('outname')
    if outname is None:
        outname, ext = splitext(msa)
        if ext.lower() == '.gz':
            outname, _ = splitext(msa)
        outname += '_filtered' + ext

    single = len(word) == 1
    if single:
        word = word[0]

    if kwargs.get('startswith', False):
        if single:
            filter = lambda label, seq, word=word: label.startswith(word)

    elif kwargs.get('endswith', False):
        if single:
            filter = lambda label, seq, word=word: label.endswith(word)

    elif kwargs.get('contains', False):
        if single:
            filter = lambda label, seq, word=word: word in label

    elif kwargs.get('equals', False):
        if single:
            filter = lambda label, seq, word=word: word == label
        else:
            filter = lambda label, seq, word=set(word): label in word
    else:
        raise TypeError('one of startswith, endswith, contains, or equals '
                        'must be specified')

    msa = MSAFile(msa,
                  filter=filter,
                  filter_full=kwargs.get('filter_full', False))

    LOGGER.info('Filtered MSA is written in file: ' +
                writeMSA(outname, msa, **kwargs))
示例#50
0
def evol_filter(msa, *word, **kwargs):

    import prody
    from prody import MSAFile, writeMSA, LOGGER
    from os.path import splitext

    outname = kwargs.get('outname')
    if outname is None:
        outname, ext = splitext(msa)
        if ext.lower() == '.gz':
            outname, _ = splitext(msa)
        outname += '_filtered' + ext

    single = len(word) == 1
    if single:
        word = word[0]

    if kwargs.get('startswith', False):
        if single:
            filter = lambda label, seq, word=word: label.startswith(word)

    elif kwargs.get('endswith', False):
        if single:
            filter = lambda label, seq, word=word: label.endswith(word)

    elif kwargs.get('contains', False):
        if single:
            filter = lambda label, seq, word=word: word in label

    elif kwargs.get('equals', False):
        if single:
            filter = lambda label, seq, word=word: word == label
        else:
            filter = lambda label, seq, word=set(word): label in word
    else:
        raise TypeError('one of startswith, endswith, contains, or equals '
                        'must be specified')

    msa = MSAFile(msa, filter=filter,
                  filter_full=kwargs.get('filter_full', False))


    LOGGER.info('Filtered MSA is written in file: ' +
                writeMSA(outname, msa, **kwargs))
示例#51
0
def searchDali(pdbId, chainId, daliURL=None, subset='fullPDB', **kwargs):
    """Search Dali server with input of PDB ID and chain ID.
    Dali server: http://ekhidna2.biocenter.helsinki.fi/dali/
    
    :arg subset: fullPDB, PDB25, PDB50, PDB90
    :type subset: str
    
    """

    LOGGER.timeit('_dali')
    # timeout = 120
    timeout = kwargs.pop('timeout', 120)
    
    if daliURL is None:
        daliURL = "http://ekhidna2.biocenter.helsinki.fi/cgi-bin/sans/dump.cgi"
    pdbId = pdbId.lower()
    pdb_chain = pdbId + chainId
    parameters = { 'cd1' : pdb_chain, 'method': 'search', 'title': 'Title_'+pdb_chain, 'address': '' }
    enc_params = urllib.urlencode(parameters).encode('utf-8')
    request = urllib2.Request(daliURL, enc_params)
    try_error = 3
    while try_error >= 0:
        try:
            url = urllib2.urlopen(request).url
            break
        except:
            try_error -= 1
            if try_error >= 0:
                LOGGER.sleep(2, '. Connection error happened. Trying to reconnect...')
                continue
            else:
                url = urllib2.urlopen(request).url
                break
    if url.split('.')[-1].lower() in ['html', 'php']:
        # print('test -1: '+url)
        url = url.replace(url.split('/')[-1], '')
    LOGGER.debug('Submitted Dali search for PDB and chain "{0} and {1}".'.format(pdbId, chainId))
    LOGGER.info(url)
    LOGGER.clear()
    obj = DaliRecord(url, pdbId, chainId, subset=subset, timeout=timeout, **kwargs)
    if obj.isSuccess:
        return obj
    
    return None
示例#52
0
def parsePolyPhen2output(pph2_output):
    '''Import PolyPhen-2 results directly from the output of
    'queryPolyPhen2' or from a file (in 'full' format).
    '''
    assert type(pph2_output) in [dict, str]
    if type(pph2_output) is dict:
        lines = pph2_output['full'].split('\n')
    else:
        with open(pph2_output, 'r') as file:
            lines = file.readlines()
    # discard invalid lines
    lines = [l for l in lines if l.strip() and l[0] != '#']
    if not lines:
        msg = (
            "PolyPhen-2's output is empty. Please check file 'pph2-log.txt' "
            "in the output folder for error messages from PolyPhen-2. \n"
            "Typical errors include: \n"
            "1) query contains *non-human* variants \n"
            "2) variants' format is incorrect (e.g. "
            '"UniprotID pos wt_aa mut_aa") \n'
            "3) wild-type amino acids are in the wrong position on the "
            "sequence (please refer to Uniprot's canonical isoform) \n"
            "4) Uniprot accession number is not recognized by PolyPhen-2. \n")
        raise RuntimeError(msg)
    # define a structured array
    pl_dtype = np.dtype([(col, 'U25') for col in pph2_columns])
    parsed_lines = np.zeros(len(lines), dtype=pl_dtype)
    # fill structured array
    n_cols = len(pph2_columns)
    for i, line in enumerate(lines):
        # parse line
        words = [w.strip() for w in line.split('\t')]
        # check format
        n_words = len(words)
        if n_words == n_cols - 1:
            # manually insert null 'other' column
            words.append('?')
        elif n_words != n_cols:
            msg = 'Incorrect number of columns: {}'.format(n_words)
            raise ValueError(msg)
        # import to structured array
        parsed_lines[i] = tuple(words)
    LOGGER.info("PolyPhen-2's output parsed.")
    return parsed_lines
示例#53
0
文件: emsurfer.py 项目: uibcdf/ProDy
    def fetch(self, url=None, localFile=False, **kwargs):
        if localFile:
            emsurfer_file = open(url, 'r')
            data = emsurfer_file.read()
            emsurfer_file.close()
        else:
            import requests
            
            if url == None:
                url = self._url

            html = requests.get(url).content

            if PY3K:
                html = html.decode()

            LOGGER.clear()
            LOGGER.report('Emsurfer results were fetched in %.1fs.', '_emsurfer')
            data = html.strip().split('\n')
        
        data_list = []
        for line in data[3:-2]:
            data_list.append(tuple(line.split('\t')))

        # Rank	EMDB_ID	EUC_D	RESOLUTION
        emsurferInfo = np.array(data_list, dtype=[('Rank', '<i4'), ('EMDB_ID', '<U70'),
                                                  ('EUC_D', '<f4'), ('RESOLUTION', '<f4')])
        emdListAll = []
        self._emsurferInfo = emsurferInfo
        emsurfer_temp_dict = dict()
        for temp in self._emsurferInfo:
            temp_dict = dict()
            temp_dict['Rank'] = temp[0]
            temp_dict['EMDB_ID'] = emdbId = temp[1]
            temp_dict['EUC_D'] = temp[2]
            temp_dict['RESOLUTION'] = temp[3]
            emsurfer_temp_dict[emdbId] = temp_dict
            emdListAll.append(emdbId)
        self._emdListAll = tuple(emdListAll)
        self._emdList = self._emdListAll
        self._alignEMD = emsurfer_temp_dict
        LOGGER.info('Obtained ' + str(len(emdListAll)) + ' EMD matches from Emsurfer for '+self._emdId+'.')
        return True
示例#54
0
def print_feat_imp_figure(filename, feat_imp, featset):
    assert isinstance(filename, str), 'filename must be a string'
    filename = os.path.splitext(filename)[0] + '.png'

    matplotlib = _try_import_matplotlib()
    if matplotlib is None:
        return
    else:
        from matplotlib import pyplot as plt

    fig = plt.figure(figsize=(7, 7))
    n = len(feat_imp)
    plt.bar(range(n), feat_imp, align='center', tick_label=featset)
    plt.xticks(rotation='vertical')
    plt.ylabel('feat. importance')
    fig.savefig(filename, format='png', bbox_inches='tight')
    plt.close()
    plt.rcParams.update(plt.rcParamsDefault)
    LOGGER.info(f'Feat. importance plot saved to {filename}')
示例#55
0
    def close(self):
        """Close the file.  This method will not affect a stream."""

        if self._filename is None:
            self._closed = True
            return

        if not self._mode.startswith('r') and self._format == STOCKHOLM:
            try:
                self._write('//\n')
            except ValueError:
                LOGGER.info('Failed to write terminal slash characters to '
                            'closed file.')

        try:
            self._stream.close()
        except Exception:
            pass
        self._closed = True
示例#56
0
文件: pdbfile.py 项目: creageng/ProDy
def parseChainsList(filename):
    """
    Parse a set of PDBs and extract chains based on a list in a text file.

    :arg filename: the name of the file to be read
    :type filename: str

    Returns: lists containing an :class:'.AtomGroup' for each PDB, 
    the headers for those PDBs, and the requested :class:`.Chain` objects
    """
    verb = LOGGER.verbosity
    LOGGER.verbosity = 'info'

    fi = open(filename, 'r')
    lines = fi.readlines()
    fi.close()

    pdb_ids = []
    ags = []
    headers = []
    chains = []
    num_lines = len(lines)
    LOGGER.progress('Starting', num_lines)
    for i, line in enumerate(lines):
        LOGGER.update(i, 'Parsing lines...')
        pdb_id = line.split()[0].split('_')[0]
        if not pdb_id in pdb_ids:
            pdb_ids.append(pdb_id)

            ag, header = parsePDB(pdb_id, compressed=False, \
                                  subset=line.split()[0].split('_')[1], header=True)

            ags.append(ag)
            headers.append(header)

        chains.append(ag.getHierView()[line.strip().split()[1]])

    LOGGER.verbosity = verb
    LOGGER.info(
        '{0} PDBs have been parsed and {1} chains have been extracted. \
                '.format(len(ags), len(chains)))

    return ags, headers, chains
示例#57
0
文件: cath.py 项目: nffaruk/ProDy
    def save(self, filename='cath.xml'):
        """Write local CATH database to an XML file. *filename* can either be a 
        file name or a handle."""

        LOGGER.timeit('_cath_write')

        if not isinstance(filename, str):
            try:
                fn = filename.name
            except AttributeError:
                fn = repr(filename)
            f = filename
        else:
            fn = filename

        LOGGER.info('Writing data to {0}...'.format(fn))

        if not len(self.root):
            raise ValueError('local database has not been built, '
                             'please call update() first')

        tree = self.copy()
        root = tree.getroot()

        # convert int to str
        length_nodes = root.findall('.//*[@length]')
        for node in length_nodes:
            node.attrib['length'] = str(node.attrib['length'])

        # add prefix to node tags
        nodes = root.iter()
        for node in nodes:
            node.tag = 'id.' + node.tag

        # add indentation to nodes
        indentElement(root)

        if isinstance(filename, str):
            f = open(filename, 'wb')
        tree.write(f, encoding='utf-8')
        f.close()

        LOGGER.report('CATH local database saved in %.2fs.', '_cath_write')
示例#58
0
    def __add__(self, other):
        """Concatenate ensembles. The reference coordinates and weights of
        *self* is used in the resulting ensemble."""

        if not isinstance(other, Ensemble):
            raise TypeError('an Ensemble instance cannot be added to an {0} '
                            'instance'.format(type(other)))
        elif self.numAtoms() != other.numAtoms():
            raise ValueError('Ensembles must have same number of atoms.')

        ensemble = Ensemble('{0} + {1}'.format(self.getTitle(),
                                               other.getTitle()))
        ensemble.setCoords(self._coords.copy())
        ensemble.addCoordset(self._confs.copy())
        ensemble.addCoordset(other.getCoordsets())
        if self._weights is not None:
            LOGGER.info('Atom weights from {0} are used in {1}.'.format(
                repr(self._title), repr(ensemble.getTitle())))
            ensemble.setWeights(self._weights)
        return ensemble
示例#59
0
文件: Uniprot.py 项目: yaz62/rhapsody
 def _sliceMSA(self, msa):
     acc_name = self.fullRecord['name   0']
     # find sequences in MSA related to the given Uniprot name
     indexes = msa.getIndex(acc_name)
     if indexes is None:
         raise RuntimeError(
             'No sequence found in MSA for {}'.format(acc_name))
     elif type(indexes) is not list:
         indexes = [indexes]
     # slice MSA to include only columns from selected sequences
     cols = np.array([], dtype=int)
     arr = msa._getArray()
     for i in indexes:
         cols = np.append(cols, np.char.isalpha(arr[i]).nonzero()[0])
     cols = np.unique(cols)
     arr = arr.take(cols, 1)
     sliced_msa = MSA(arr, title='refined', labels=msa._labels)
     LOGGER.info('Number of columns in MSA reduced to {}.'.format( \
                 sliced_msa.numResidues()))
     return sliced_msa, indexes