Exemplo n.º 1
0
def prody_anm(pdb, **kwargs):
    """Perform ANM calculations for *pdb*.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import numpy as np
    import prody
    LOGGER = prody.LOGGER

    selstr = kwargs.get('select')
    prefix = kwargs.get('prefix')
    cutoff = kwargs.get('cutoff')
    gamma = kwargs.get('gamma')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    model = kwargs.get('model')

    pdb = prody.parsePDB(pdb, model=model)
    if prefix == '_anm':
        prefix = pdb.getTitle() + '_anm'

    select = pdb.select(selstr)
    if select is None:
        LOGGER.warn('Selection {0} did not match any atoms.'
                    .format(repr(selstr)))
        return
    LOGGER.info('{0} atoms will be used for ANM calculations.'
                .format(len(select)))

    anm = prody.ANM(pdb.getTitle())
    anm.buildHessian(select, cutoff, gamma)
    anm.calcModes(nmodes)
    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(anm, join(outdir, prefix))
    prody.writeNMD(join(outdir, prefix + '.nmd'), anm, select)

    extend = kwargs.get('extend')
    if extend:
        if extend == 'all':
            extended = prody.extendModel(anm, select, pdb)
        else:
            extended = prody.extendModel(anm, select, select | pdb.bb)
        prody.writeNMD(join(outdir, prefix + '_extended_' +
                       extend + '.nmd'), *extended)

    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')


    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors'+ext),
                         anm.getArray(), delimiter=delim, format=format)
        prody.writeArray(join(outdir, prefix + '_evalues'+ext),
                         anm.getEigvals(), delimiter=delim, format=format)

    if outall or kwargs.get('outbeta'):
        from prody.utilities import openFile
        fout = openFile(prefix + '_beta.txt', 'w', folder=outdir)
        fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n'
                       .format(['C', 'RES', '####', 'Exp.', 'The.']))
        for data in zip(select.getChids(), select.getResnames(),
                        select.getResnums(), select.getBetas(),
                        prody.calcTempFactors(anm, select)):
            fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n'
                       .format(data))
        fout.close()

    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         anm.getCovariance(), delimiter=delim, format=format)

    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(anm)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir, prefix +
                             '_cross-correlations' + ext),
                             cc, delimiter=delim,  format=format)
        if outall or kwargs.get('outhm'):
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc, resnum=select.getResnums(),
                               xlabel='Residue', ylabel='Residue',
                               title=anm.getTitle() + ' cross-correlations')

    if outall or kwargs.get('hessian'):
        prody.writeArray(join(outdir, prefix + '_hessian'+ext),
                         anm.getHessian(), delimiter=delim, format=format)

    if outall or kwargs.get('kirchhoff'):
        prody.writeArray(join(outdir, prefix + '_kirchhoff'+ext),
                         anm.getKirchhoff(), delimiter=delim, format=format)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqflucts'+ext),
                         prody.calcSqFlucts(anm), delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    bf = kwargs.get('figbeta')
    cm = kwargs.get('figcmap')


    if figall or cc or sf or bf or cm:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')
            format = format.lower()

            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(anm)
                plt.savefig(join(outdir, prefix + '_cc.'+format),
                    dpi=dpi, format=format)
                plt.close('all')

            if figall or cm:
                plt.figure(figsize=(width, height))
                prody.showContactMap(anm)
                plt.savefig(join(outdir, prefix + '_cm.'+format),
                    dpi=dpi, format=format)
                plt.close('all')

            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(anm)
                plt.savefig(join(outdir, prefix + '_sf.'+format),
                    dpi=dpi, format=format)
                plt.close('all')

            if figall or bf:
                plt.figure(figsize=(width, height))
                bexp = select.getBetas()
                bcal = prody.calcTempFactors(anm, select)
                plt.plot(bexp, label='Experimental')
                plt.plot(bcal, label=('Theoretical (R={0:.2f})'
                                        .format(np.corrcoef(bcal, bexp)[0,1])))
                plt.legend(prop={'size': 10})
                plt.xlabel('Node index')
                plt.ylabel('Experimental B-factors')
                plt.title(pdb.getTitle() + ' B-factors')
                plt.savefig(join(outdir, prefix + '_bf.'+format),
                    dpi=dpi, format=format)
                plt.close('all')
Exemplo n.º 2
0
def prody_pca(coords, **kwargs):
    """Perform PCA calculations for PDB or DCD format *coords* file.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, splitext, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import prody
    LOGGER = prody.LOGGER

    prefix = kwargs.get('prefix')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')

    ext = splitext(coords)[1].lower()
    if ext == '.gz':
        ext = splitext(coords[:-3])[1].lower()

    if ext == '.dcd':
        pdb = kwargs.get('psf') or kwargs.get('pdb')
        if pdb:
            if splitext(pdb)[1].lower() == '.psf':
                pdb = prody.parsePSF(pdb)
            else:
                pdb = prody.parsePDB(pdb)
        dcd = prody.DCDFile(coords)
        if prefix == '_pca' or prefix == '_eda':
            prefix = dcd.getTitle() + prefix

        if len(dcd) < 2:
            raise ValueError('DCD file must have multiple frames')
        if pdb:
            if pdb.numAtoms() == dcd.numAtoms():
                select = pdb.select(selstr)
                dcd.setAtoms(select)
                LOGGER.info('{0} atoms are selected for calculations.'
                            .format(len(select)))
            else:
                select = pdb.select(selstr)
                if select.numAtoms() != dcd.numAtoms():
                    raise ValueError('number of selected atoms ({0}) does '
                                     'not match number of atoms in the DCD '
                                     'file ({1})'.format(select.numAtoms(),
                                                           dcd.numAtoms()))
                if pdb.numCoordsets():
                    dcd.setCoords(select.getCoords())

        else:
            select = prody.AtomGroup()
            select.setCoords(dcd.getCoords())
        pca = prody.PCA(dcd.getTitle())
        if len(dcd) > 1000:
            pca.buildCovariance(dcd, aligned=kwargs.get('aligned'))
            pca.calcModes(nmodes)
            ensemble = dcd
        else:
            ensemble = dcd[:]
            if not kwargs.get('aligned'):
                ensemble.iterpose()
            pca.performSVD(ensemble)

    else:
        pdb = prody.parsePDB(coords)
        if pdb.numCoordsets() < 2:
            raise ValueError('PDB file must contain multiple models')

        if prefix == '_pca' or prefix == '_eda':
            prefix = pdb.getTitle() + prefix

        select = pdb.select(selstr)
        LOGGER.info('{0} atoms are selected for calculations.'
                    .format(len(select)))
        if select is None:
            raise ValueError('selection {0} do not match any atoms'
                                .format(repr(selstr)))
        LOGGER.info('{0} atoms will be used for PCA calculations.'
                    .format(len(select)))
        ensemble = prody.Ensemble(select)
        pca = prody.PCA(pdb.getTitle())
        if not kwargs.get('aligned'):
            ensemble.iterpose()
        pca.performSVD(ensemble)


    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(pca, join(outdir, prefix))

    prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select)

    extend = kwargs.get('extend')
    if extend:
        if pdb:
            if extend == 'all':
                extended = prody.extendModel(pca[:nmodes], select, pdb)
            else:
                extended = prody.extendModel(pca[:nmodes], select,
                                             select | pdb.bb)
            prody.writeNMD(join(outdir, prefix + '_extended_' +
                           extend + '.nmd'), *extended)
        else:
            prody.LOGGER.warn('Model could not be extended, provide a PDB or '
                              'PSF file.')
    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')

    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors'+ext),
                         pca.getArray(), delimiter=delim, format=format)
        prody.writeArray(join(outdir, prefix + '_evalues'+ext),
                         pca.getEigvals(), delimiter=delim, format=format)
    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance'+ext),
                         pca.getCovariance(), delimiter=delim, format=format)
    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(pca)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir, prefix + '_cross-correlations' +
                             ext), cc, delimiter=delim, format=format)
        if outall or kwargs.get('outhm'):
            resnums = select.getResnums()
            hmargs = {} if resnums is None else {'resnums': resnums}
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc, xlabel='Residue', ylabel='Residue',
                               title=pca.getTitle() + ' cross-correlations',
                               **hmargs)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqfluct'+ext),
                         prody.calcSqFlucts(pca), delimiter=delim,
                         format=format)
    if outall or kwargs.get('outproj'):
        prody.writeArray(join(outdir, prefix + '_proj'+ext),
                         prody.calcProjection(ensemble, pca), delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    sp = kwargs.get('figproj')

    if figall or cc or sf or sp:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')

            format = format.lower()
            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(pca)
                plt.savefig(join(outdir, prefix + '_cc.'+format),
                    dpi=dpi, format=format)
                plt.close('all')
            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(pca)
                plt.savefig(join(outdir, prefix + '_sf.'+format),
                    dpi=dpi, format=format)
                plt.close('all')
            if figall or sp:
                indices = []
                for item in sp.split():
                    try:
                        if '-' in item:
                            item = item.split('-')
                            if len(item) == 2:
                                indices.append(list(range(int(item[0])-1,
                                                          int(item[1]))))
                        elif ',' in item:
                            indices.append([int(i)-1 for i in item.split(',')])
                        else:
                            indices.append(int(item)-1)
                    except:
                        pass
                for index in indices:
                        plt.figure(figsize=(width, height))
                        prody.showProjection(ensemble, pca[index])
                        if isinstance(index, int):
                            index = [index]
                        index = [str(i+1) for i in index]
                        plt.savefig(join(outdir, prefix + '_proj_' +
                            '_'.join(index) + '.' + format),
                            dpi=dpi, format=format)
                        plt.close('all')
Exemplo n.º 3
0
def prody_pca(coords, **kwargs):
    """Perform PCA calculations for PDB or DCD format *coords* file.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, splitext, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import prody
    LOGGER = prody.LOGGER

    prefix = kwargs.get('prefix')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    quiet = kwargs.pop('quiet', False)
    altloc = kwargs.get('altloc')

    ext = splitext(coords)[1].lower()
    if ext == '.gz':
        ext = splitext(coords[:-3])[1].lower()

    if ext == '.dcd':
        pdb = kwargs.get('psf') or kwargs.get('pdb')
        if pdb:
            if splitext(pdb)[1].lower() == '.psf':
                pdb = prody.parsePSF(pdb)
            else:
                pdb = prody.parsePDB(pdb, altlocs=altlocs)
        dcd = prody.DCDFile(coords)
        if prefix == '_pca' or prefix == '_eda':
            prefix = dcd.getTitle() + prefix

        if len(dcd) < 2:
            raise ValueError('DCD file must have multiple frames')
        if pdb:
            if pdb.numAtoms() == dcd.numAtoms():
                select = pdb.select(selstr)
                dcd.setAtoms(select)
                LOGGER.info('{0} atoms are selected for calculations.'.format(
                    len(select)))
            else:
                select = pdb.select(selstr)
                if select.numAtoms() != dcd.numAtoms():
                    raise ValueError('number of selected atoms ({0}) does '
                                     'not match number of atoms in the DCD '
                                     'file ({1})'.format(
                                         select.numAtoms(), dcd.numAtoms()))
                if pdb.numCoordsets():
                    dcd.setCoords(select.getCoords())

        else:
            select = prody.AtomGroup()
            select.setCoords(dcd.getCoords())
        pca = prody.PCA(dcd.getTitle())

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                if len(dcd) > 1000:
                    pca.buildCovariance(dcd,
                                        aligned=kwargs.get('aligned'),
                                        quiet=quiet)
                    pca.calcModes(nmodes)
                    ensemble = dcd
                else:
                    ensemble = dcd[:]
                    if not kwargs.get('aligned'):
                        ensemble.iterpose(quiet=quiet)
                    pca.performSVD(ensemble)
                nmodes = pca.numModes()
        else:
            if len(dcd) > 1000:
                pca.buildCovariance(dcd,
                                    aligned=kwargs.get('aligned'),
                                    quiet=quiet)
                pca.calcModes(nmodes)
                ensemble = dcd
            else:
                ensemble = dcd[:]
                if not kwargs.get('aligned'):
                    ensemble.iterpose(quiet=quiet)
                pca.performSVD(ensemble)
            nmodes = pca.numModes()

    else:
        pdb = prody.parsePDB(coords)
        if pdb.numCoordsets() < 2:
            raise ValueError('PDB file must contain multiple models')

        if prefix == '_pca' or prefix == '_eda':
            prefix = pdb.getTitle() + prefix

        select = pdb.select(selstr)
        LOGGER.info('{0} atoms are selected for calculations.'.format(
            len(select)))
        if select is None:
            raise ValueError('selection {0} do not match any atoms'.format(
                repr(selstr)))
        LOGGER.info('{0} atoms will be used for PCA calculations.'.format(
            len(select)))
        ensemble = prody.Ensemble(select)
        pca = prody.PCA(pdb.getTitle())
        if not kwargs.get('aligned'):
            ensemble.iterpose()

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                pca.performSVD(ensemble)
        else:
            pca.performSVD(ensemble)

    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(pca, join(outdir, prefix))

    if kwargs.get('outscipion'):
        prody.writeScipionModes(outdir, pca)

    prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select)

    extend = kwargs.get('extend')
    if extend:
        if pdb:
            if extend == 'all':
                extended = prody.extendModel(pca[:nmodes], select, pdb)
            else:
                extended = prody.extendModel(pca[:nmodes], select,
                                             select | pdb.bb)
            prody.writeNMD(
                join(outdir, prefix + '_extended_' + extend + '.nmd'),
                *extended)
        else:
            prody.LOGGER.warn('Model could not be extended, provide a PDB or '
                              'PSF file.')
    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')

    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors' + ext),
                         pca.getArray(),
                         delimiter=delim,
                         format=format)
        prody.writeArray(join(outdir, prefix + '_evalues' + ext),
                         pca.getEigvals(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         pca.getCovariance(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(pca)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir,
                                  prefix + '_cross-correlations' + ext),
                             cc,
                             delimiter=delim,
                             format=format)
        if outall or kwargs.get('outhm'):
            resnums = select.getResnums()
            hmargs = {} if resnums is None else {'resnums': resnums}
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc,
                               xlabel='Residue',
                               ylabel='Residue',
                               title=pca.getTitle() + ' cross-correlations',
                               **hmargs)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqfluct' + ext),
                         prody.calcSqFlucts(pca),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outproj'):
        prody.writeArray(join(outdir, prefix + '_proj' + ext),
                         prody.calcProjection(ensemble, pca),
                         delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    sp = kwargs.get('figproj')

    if figall or cc or sf or sp:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')

            format = format.lower()
            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(pca)
                plt.savefig(join(outdir, prefix + '_cc.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(pca)
                plt.savefig(join(outdir, prefix + '_sf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sp:
                indices = []
                for item in sp.split():
                    try:
                        if '-' in item:
                            item = item.split('-')
                            if len(item) == 2:
                                indices.append(
                                    list(range(int(item[0]) - 1,
                                               int(item[1]))))
                        elif ',' in item:
                            indices.append(
                                [int(i) - 1 for i in item.split(',')])
                        else:
                            indices.append(int(item) - 1)
                    except:
                        pass
                for index in indices:
                    plt.figure(figsize=(width, height))
                    prody.showProjection(ensemble, pca[index])
                    if isinstance(index, Integral):
                        index = [index]
                    index = [str(i + 1) for i in index]
                    plt.savefig(join(
                        outdir,
                        prefix + '_proj_' + '_'.join(index) + '.' + format),
                                dpi=dpi,
                                format=format)
                    plt.close('all')
Exemplo n.º 4
0
def evol_coevol(msa, **kwargs):

    from numpy import arange

    import prody
    from prody import parseMSA, buildMutinfoMatrix, showMutinfoMatrix
    from prody import applyMutinfoCorr, calcShannonEntropy
    from prody import writeArray, LOGGER, applyMutinfoNorm, writeHeatmap
    from os.path import splitext

    prefix = kwargs.get('prefix')
    if prefix is None:
        prefix, _ = splitext(msa)
        if _.lower() == '.gz':
            prefix, _ = splitext(prefix)
        prefix += '_mutinfo'

    msa = parseMSA(msa)
    mutinfo = buildMutinfoMatrix(msa, **kwargs)
    numformat = kwargs.get('numformat', '%12g')
    heatmap = kwargs.get('heatmap', False)
    #writeArray(prefix + '.txt', mutinfo, format=numformat)
    if heatmap:
        hmargs = {
                  'xlabel': 'Residue', 'ylabel': 'Residue',
                  'xorigin': 1, 'xstep': 1,
                  'residue': arange(msa.numResidues())}

    todo = [(None, None)]
    norm = kwargs.get('normalization', [])
    corr = kwargs.get('correction', [])
    if norm is not None:
        if 'joint' in norm:
            todo.append(('norm', 'joint'))
        for which in norm:
            if which == 'join': continue
            todo.append(('norm', which))
    if corr is not None:
        for which in corr:
            todo.append(('corr', which))
    entropy = None

    for what, which in todo:
        if what is None:
            matrix = mutinfo
            suffix = ''
            tuffix = ' Mutual Information'
        elif which == 'joint':
            LOGGER.info('Applying {0} normalization.'.format(repr(which)))
            matrix = buildMutinfoMatrix(msa, norm=True, **kwargs)
            suffix = '_norm_joint'
            tuffix = ' MI - Normalization: ' + which
        elif what == 'norm':
            LOGGER.info('Applying {0} normalization.'.format(repr(which)))
            if entropy is None:
                entropy = calcShannonEntropy(msa, **kwargs)
            matrix = applyMutinfoNorm(mutinfo, entropy, norm=which)
            suffix = '_norm_' + which
            tuffix = ' MI - Normalization: ' + which
        else:
            LOGGER.info('Applying {0} correction.'.format(repr(which)))
            matrix = applyMutinfoCorr(mutinfo, which)
            suffix = '_corr_' + which
            tuffix = ' MI - Correction: ' + which

        writeArray(prefix + suffix + '.txt',
                   matrix, format=kwargs.get('numformat', '%12g'))

        if heatmap:
            writeHeatmap(prefix + suffix + '.hm', matrix,
                         title = msa.getTitle() + tuffix, **hmargs)

        if kwargs.get('figcoevol'):
            try:
                import matplotlib.pyplot as plt
            except ImportError:
                LOGGER.warn('Matplotlib could not be imported, '
                            'figures are not saved.')
            else:
                cmin = kwargs.get('cmin', matrix.min())
                cmax = kwargs.get('cmax', matrix.max())
                prody.SETTINGS['auto_show'] = False
                width = kwargs.get('figwidth', 8)
                height = kwargs.get('figheight', 6)
                xlabel = kwargs.get('xlabel')
                title = kwargs.get('title')
                figure = plt.figure(figsize=(width, height))
                show = showMutinfoMatrix(matrix, msa=msa, clim=(cmin, cmax),
                                         xlabel=xlabel, title=title)

                format = kwargs.get('figformat', 'pdf')
                figure.savefig(prefix + suffix + '.' + format, format=format,
                            dpi=kwargs.get('figdpi', 300))
Exemplo n.º 5
0
def prody_anm(pdb, **kwargs):
    """Perform ANM calculations for *pdb*.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import numpy as np
    import prody
    LOGGER = prody.LOGGER

    selstr = kwargs.get('select')
    prefix = kwargs.get('prefix')
    cutoff = kwargs.get('cutoff')
    gamma = kwargs.get('gamma')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    model = kwargs.get('model')

    pdb = prody.parsePDB(pdb, model=model)
    if prefix == '_anm':
        prefix = pdb.getTitle() + '_anm'

    select = pdb.select(selstr)
    if select is None:
        LOGGER.warn('Selection {0} did not match any atoms.'.format(
            repr(selstr)))
        return
    LOGGER.info('{0} atoms will be used for ANM calculations.'.format(
        len(select)))

    anm = prody.ANM(pdb.getTitle())
    anm.buildHessian(select, cutoff, gamma)
    anm.calcModes(nmodes)
    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(anm, join(outdir, prefix))
    prody.writeNMD(join(outdir, prefix + '.nmd'), anm, select)

    extend = kwargs.get('extend')
    if extend:
        if extend == 'all':
            extended = prody.extendModel(anm, select, pdb)
        else:
            extended = prody.extendModel(anm, select, select | pdb.bb)
        prody.writeNMD(join(outdir, prefix + '_extended_' + extend + '.nmd'),
                       *extended)

    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')

    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors' + ext),
                         anm.getArray(),
                         delimiter=delim,
                         format=format)
        prody.writeArray(join(outdir, prefix + '_evalues' + ext),
                         anm.getEigvals(),
                         delimiter=delim,
                         format=format)

    if outall or kwargs.get('outbeta'):
        from prody.utilities import openFile
        fout = openFile(prefix + '_beta.txt', 'w', folder=outdir)
        fout.write(
            '{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n'.format(
                ['C', 'RES', '####', 'Exp.', 'The.']))
        for data in zip(select.getChids(), select.getResnames(),
                        select.getResnums(), select.getBetas(),
                        prody.calcTempFactors(anm, select)):
            fout.write(
                '{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n'.
                format(data))
        fout.close()

    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         anm.getCovariance(),
                         delimiter=delim,
                         format=format)

    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(anm)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir,
                                  prefix + '_cross-correlations' + ext),
                             cc,
                             delimiter=delim,
                             format=format)
        if outall or kwargs.get('outhm'):
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc,
                               resnum=select.getResnums(),
                               xlabel='Residue',
                               ylabel='Residue',
                               title=anm.getTitle() + ' cross-correlations')

    if outall or kwargs.get('hessian'):
        prody.writeArray(join(outdir, prefix + '_hessian' + ext),
                         anm.getHessian(),
                         delimiter=delim,
                         format=format)

    if outall or kwargs.get('kirchhoff'):
        prody.writeArray(join(outdir, prefix + '_kirchhoff' + ext),
                         anm.getKirchhoff(),
                         delimiter=delim,
                         format=format)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqflucts' + ext),
                         prody.calcSqFlucts(anm),
                         delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    bf = kwargs.get('figbeta')
    cm = kwargs.get('figcmap')

    if figall or cc or sf or bf or cm:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')
            format = format.lower()

            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(anm)
                plt.savefig(join(outdir, prefix + '_cc.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')

            if figall or cm:
                plt.figure(figsize=(width, height))
                prody.showContactMap(anm)
                plt.savefig(join(outdir, prefix + '_cm.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')

            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(anm)
                plt.savefig(join(outdir, prefix + '_sf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')

            if figall or bf:
                plt.figure(figsize=(width, height))
                bexp = select.getBetas()
                bcal = prody.calcTempFactors(anm, select)
                plt.plot(bexp, label='Experimental')
                plt.plot(bcal,
                         label=('Theoretical (R={0:.2f})'.format(
                             np.corrcoef(bcal, bexp)[0, 1])))
                plt.legend(prop={'size': 10})
                plt.xlabel('Node index')
                plt.ylabel('Experimental B-factors')
                plt.title(pdb.getTitle() + ' B-factors')
                plt.savefig(join(outdir, prefix + '_bf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
Exemplo n.º 6
0
def prody_gnm(pdb, **kwargs):
    """Perform GNM calculations for *pdb*.
    
    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, splitext, join

    outdir = kwargs.get("outdir")
    if not isdir(outdir):
        raise IOError("{0} is not a valid path".format(repr(outdir)))

    import numpy as np
    import prody

    LOGGER = prody.LOGGER

    selstr = kwargs.get("select")
    prefix = kwargs.get("prefix")
    cutoff = kwargs.get("cutoff")
    gamma = kwargs.get("gamma")
    nmodes = kwargs.get("nmodes")
    selstr = kwargs.get("select")
    model = kwargs.get("model")

    pdb = prody.parsePDB(pdb, model=model)
    if prefix == "_gnm":
        prefix = pdb.getTitle() + "_gnm"

    select = pdb.select(selstr)
    if select is None:
        raise ValueError("selection {0} do not match any atoms".format(repr(selstr)))
    LOGGER.info("{0} atoms will be used for GNM calculations.".format(len(select)))

    gnm = prody.GNM(pdb.getTitle())
    gnm.buildKirchhoff(select, cutoff, gamma)
    gnm.calcModes(nmodes)

    LOGGER.info("Writing numerical output.")

    if kwargs.get("outnpz"):
        prody.saveModel(gnm, join(outdir, prefix))

    prody.writeNMD(join(outdir, prefix + ".nmd"), gnm, select)

    extend = kwargs.get("extend")
    if extend:
        if extend == "all":
            extended = prody.extendModel(gnm, select, pdb)
        else:
            extended = prody.extendModel(gnm, select, select | pdb.bb)
        prody.writeNMD(join(outdir, prefix + "_extended_" + extend + ".nmd"), *extended)

    outall = kwargs.get("outall")
    delim = kwargs.get("numdelim")
    ext = kwargs.get("numext")
    format = kwargs.get("numformat")

    if outall or kwargs.get("outeig"):
        prody.writeArray(join(outdir, prefix + "_evectors" + ext), gnm.getArray(), delimiter=delim, format=format)
        prody.writeArray(join(outdir, prefix + "_evalues" + ext), gnm.getEigvals(), delimiter=delim, format=format)

    if outall or kwargs.get("outbeta"):
        from prody.utilities import openFile

        fout = openFile(prefix + "_beta.txt", "w", folder=outdir)
        fout.write("{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n".format(["C", "RES", "####", "Exp.", "The."]))
        for data in zip(
            select.getChids(),
            select.getResnames(),
            select.getResnums(),
            select.getBetas(),
            prody.calcTempFactors(gnm, select),
        ):
            fout.write("{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n".format(data))
        fout.close()

    if outall or kwargs.get("outcov"):
        prody.writeArray(
            join(outdir, prefix + "_covariance" + ext), gnm.getCovariance(), delimiter=delim, format=format
        )

    if outall or kwargs.get("outcc") or kwargs.get("outhm"):
        cc = prody.calcCrossCorr(gnm)
        if outall or kwargs.get("outcc"):
            prody.writeArray(join(outdir, prefix + "_cross-correlations" + ext), cc, delimiter=delim, format=format)
        if outall or kwargs.get("outhm"):
            prody.writeHeatmap(
                join(outdir, prefix + "_cross-correlations.hm"),
                cc,
                resnum=select.getResnums(),
                xlabel="Residue",
                ylabel="Residue",
                title=gnm.getTitle() + " cross-correlations",
            )

    if outall or kwargs.get("kirchhoff"):
        prody.writeArray(join(outdir, prefix + "_kirchhoff" + ext), gnm.getKirchhoff(), delimiter=delim, format=format)

    if outall or kwargs.get("outsf"):
        prody.writeArray(
            join(outdir, prefix + "_sqfluct" + ext), prody.calcSqFlucts(gnm), delimiter=delim, format=format
        )

    figall = kwargs.get("figall")
    cc = kwargs.get("figcc")
    sf = kwargs.get("figsf")
    bf = kwargs.get("figbeta")
    cm = kwargs.get("figcmap")
    modes = kwargs.get("figmode")

    if figall or cc or sf or bf or cm or modes:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning("Matplotlib could not be imported. " "Figures are not saved.")
        else:
            prody.SETTINGS["auto_show"] = False
            LOGGER.info("Saving graphical output.")
            format = kwargs.get("figformat")
            width = kwargs.get("figwidth")
            height = kwargs.get("figheight")
            dpi = kwargs.get("figdpi")
            format = format.lower()

            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(gnm)
                plt.savefig(join(outdir, prefix + "_cc." + format), dpi=dpi, format=format)
                plt.close("all")

            if figall or cm:
                plt.figure(figsize=(width, height))
                prody.showContactMap(gnm)
                plt.savefig(join(outdir, prefix + "_cm." + format), dpi=dpi, format=format)
                plt.close("all")

            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(gnm)
                plt.savefig(join(outdir, prefix + "_sf." + format), dpi=dpi, format=format)
                plt.close("all")

            if figall or bf:
                plt.figure(figsize=(width, height))
                bexp = select.getBetas()
                bcal = prody.calcTempFactors(gnm, select)
                plt.plot(bexp, label="Experimental")
                plt.plot(bcal, label=("Theoretical (corr coef = {0:.2f})".format(np.corrcoef(bcal, bexp)[0, 1])))
                plt.legend(prop={"size": 10})
                plt.xlabel("Node index")
                plt.ylabel("Experimental B-factors")
                plt.title(pdb.getTitle() + " B-factors")
                plt.savefig(join(outdir, prefix + "_bf." + format), dpi=dpi, format=format)
                plt.close("all")

            if modes:
                indices = []
                items = modes.split()
                items = sum([item.split(",") for item in items], [])
                for item in items:
                    try:
                        item = item.split("-")
                        if len(item) == 1:
                            indices.append(int(item[0]) - 1)
                        elif len(item) == 2:
                            indices.extend(range(int(item[0]) - 1, int(item[1])))
                    except:
                        pass
                for index in indices:
                    try:
                        mode = gnm[index]
                    except:
                        pass
                    else:
                        plt.figure(figsize=(width, height))
                        prody.showMode(mode)
                        plt.grid()
                        plt.savefig(
                            join(outdir, prefix + "_mode_" + str(mode.getIndex() + 1) + "." + format),
                            dpi=dpi,
                            format=format,
                        )
                        plt.close("all")
Exemplo n.º 7
0
def prody_gnm(pdb, **kwargs):
    """Perform GNM calculations for *pdb*.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, splitext, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import numpy as np
    import prody
    LOGGER = prody.LOGGER

    selstr = kwargs.get('select')
    prefix = kwargs.get('prefix')
    cutoff = kwargs.get('cutoff')
    gamma = kwargs.get('gamma')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    model = kwargs.get('model')
    altloc = kwargs.get('altloc')
    zeros = kwargs.get('zeros')

    pdb = prody.parsePDB(pdb, model=model, altloc=altloc)
    if prefix == '_gnm':
        prefix = pdb.getTitle() + '_gnm'

    select = pdb.select(selstr)
    if select is None:
        raise ValueError('selection {0} do not match any atoms'.format(
            repr(selstr)))
    LOGGER.info('{0} atoms will be used for GNM calculations.'.format(
        len(select)))

    gnm = prody.GNM(pdb.getTitle())

    nproc = kwargs.get('nproc')
    if nproc:
        try:
            from threadpoolctl import threadpool_limits
        except ImportError:
            raise ImportError(
                'Please install threadpoolctl to control threads')

        with threadpool_limits(limits=nproc, user_api="blas"):
            gnm.buildKirchhoff(select, cutoff, gamma)
            gnm.calcModes(nmodes, zeros=zeros)
    else:
        gnm.buildKirchhoff(select, cutoff, gamma)
        gnm.calcModes(nmodes, zeros=zeros)

    LOGGER.info('Writing numerical output.')

    if kwargs.get('outnpz'):
        prody.saveModel(gnm, join(outdir, prefix))

    if kwargs.get('outscipion'):
        prody.writeScipionModes(outdir, gnm)

    prody.writeNMD(join(outdir, prefix + '.nmd'), gnm, select)

    extend = kwargs.get('extend')
    if extend:
        if extend == 'all':
            extended = prody.extendModel(gnm, select, pdb)
        else:
            extended = prody.extendModel(gnm, select, select | pdb.bb)
        prody.writeNMD(join(outdir, prefix + '_extended_' + extend + '.nmd'),
                       *extended)

    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')

    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors' + ext),
                         gnm.getArray(),
                         delimiter=delim,
                         format=format)
        prody.writeArray(join(outdir, prefix + '_evalues' + ext),
                         gnm.getEigvals(),
                         delimiter=delim,
                         format=format)

    if outall or kwargs.get('outbeta'):
        from prody.utilities import openFile
        fout = openFile(prefix + '_beta' + ext, 'w', folder=outdir)
        fout.write(
            '{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n'.format(
                ['C', 'RES', '####', 'Exp.', 'The.']))
        for data in zip(select.getChids(), select.getResnames(),
                        select.getResnums(), select.getBetas(),
                        prody.calcTempFactors(gnm, select)):
            fout.write(
                '{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n'.
                format(data))
        fout.close()

    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         gnm.getCovariance(),
                         delimiter=delim,
                         format=format)

    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(gnm)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir,
                                  prefix + '_cross-correlations' + ext),
                             cc,
                             delimiter=delim,
                             format=format)
        if outall or kwargs.get('outhm'):
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc,
                               resnum=select.getResnums(),
                               xlabel='Residue',
                               ylabel='Residue',
                               title=gnm.getTitle() + ' cross-correlations')

    if outall or kwargs.get('kirchhoff'):
        prody.writeArray(join(outdir, prefix + '_kirchhoff' + ext),
                         gnm.getKirchhoff(),
                         delimiter=delim,
                         format=format)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqfluct' + ext),
                         prody.calcSqFlucts(gnm),
                         delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    bf = kwargs.get('figbeta')
    cm = kwargs.get('figcmap')
    modes = kwargs.get('figmode')

    if figall or cc or sf or bf or cm or modes:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')
            format = format.lower()

            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(gnm)
                plt.savefig(join(outdir, prefix + '_cc.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')

            if figall or cm:
                plt.figure(figsize=(width, height))
                prody.showContactMap(gnm)
                plt.savefig(join(outdir, prefix + '_cm.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')

            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(gnm)
                plt.savefig(join(outdir, prefix + '_sf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')

            if figall or bf:
                plt.figure(figsize=(width, height))
                bexp = select.getBetas()
                bcal = prody.calcTempFactors(gnm, select)
                plt.plot(bexp, label='Experimental')
                plt.plot(bcal,
                         label=('Theoretical (corr coef = {0:.2f})'.format(
                             np.corrcoef(bcal, bexp)[0, 1])))
                plt.legend(prop={'size': 10})
                plt.xlabel('Node index')
                plt.ylabel('Experimental B-factors')
                plt.title(pdb.getTitle() + ' B-factors')
                plt.savefig(join(outdir, prefix + '_bf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')

            if modes:
                indices = []
                items = modes.split()
                items = sum([item.split(',') for item in items], [])
                for item in items:
                    try:
                        item = item.split('-')
                        if len(item) == 1:
                            indices.append(int(item[0]) - 1)
                        elif len(item) == 2:
                            indices.extend(
                                list(range(int(item[0]) - 1, int(item[1]))))
                    except:
                        pass
                for index in indices:
                    try:
                        mode = gnm[index]
                    except:
                        pass
                    else:
                        plt.figure(figsize=(width, height))
                        prody.showMode(mode)
                        plt.grid()
                        plt.savefig(join(
                            outdir, prefix + '_mode_' +
                            str(mode.getIndex() + 1) + '.' + format),
                                    dpi=dpi,
                                    format=format)
                        plt.close('all')