Python Plots.annotate示例，popgen.Plots.annotate Python示例

示例#1

0

显示文件

文件： LD.py 项目： airanmehr/bio

def plotScalingFactor():
    r=2*1e-8
    l = 5e4
    dpi = 300
    j = 0
    for nu0 in [0.005, 0.1]:
        for s in [0.025, 0.1]:
            t = np.arange(0, 2 * (utl.logit(0.995) - utl.logit(nu0)) / s + 1., 1)
            fig, ax = plt.subplots(2, 1, figsize=(5.5, 2.5), dpi=dpi, sharex=True);
            nu(t, s=s, nu0=nu0).plot(color='k', legend=False, ax=ax[0])
            pplt.annotate(r'$s$={}, $\nu_0=${} ({} Sweep)'.format(s, nu0, ('Soft', 'Hard')[nu0 == 0.005]), fontsize=7,
                          ax=ax[0])
            pplt.setSize(ax=ax[0], fontsize=6)
            ax[0].set_ylabel(r'$\nu_t$')
            #
            H0 = H(t[0], s=s, nu0=nu0)
            Ht = H(t, s=s, nu0=nu0)
            df = pd.DataFrame([np.log(Ht / H0), -2 * r * t * l], columns=t, index=['log(Growth)', r'log(Decay)']).T
            df['log(Growth) + log(Decay)'] = df.sum(1)
            df.plot(ax=ax[1], grid=True, linewidth=2);
            ax[1].set_xlabel('Generations');
            ax[1].set_ylabel('Log(Scaling Factor)')
            ax[1].axvline(df.iloc[1:, 2].abs().idxmin(), color='k', linestyle='--', linewidth=0.5)
            # if j != 3:
            #     ax[1].legend_.remove()
            # else:
            ax[1].legend(['log(Growth)', r'log(Decay)', 'log(Growth) + log(Decay)'], bbox_to_anchor=(1.45, .75),
                         prop={'size': 6})
            pplt.setSize(ax[1], fontsize=6)

            plt.tight_layout(pad=0.1, rect=[0, 0, 0.7, 1])
            plt.gcf().subplots_adjust(bottom=0.15)
            pplt.savefig('decayFactors{}'.format(j), dpi=dpi)
            j += 1

示例#2

0

显示文件

文件： Depth.py 项目： airanmehr/bio

def plotDepth():
    sns.set_style("whitegrid", {"grid.color": "1", 'axes.linewidth': .5, "grid.linewidth": ".09"})
    sns.set_context("notebook", font_scale=1.4, rc={"lines.linewidth": 2.5})
    d = pd.read_pickle(utl.outpath + 'real/CD.F59.df').xs('D', level='READ', axis=1)
    (d.min(1) > 50).sum()

    (d > 50).sum().sum()

    z = pd.Series(np.ndarray.flatten(d.values))
    fontsize = 6
    mpl.rcParams.update({'font.size': fontsize})
    plt.figure(figsize=(6, 4), dpi=300);
    plt.subplot(2, 2, 1);
    z.value_counts().sort_index().plot()
    plt.xlim([0, 200]);
    plt.xlabel('Depth');
    plt.ylabel('Number of Measurments' + '\n (out of {:.1f}M)'.format(z.shape[0] / 1e6));
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
    plt.title('Scaled PDF')
    pplt.annotate('(A)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.subplot(2, 2, 2);

    z.value_counts().sort_index().cumsum().plot()
    plt.xlim([0, 200])
    plt.ylim([-3e5, 2.05 * 1e7])
    plt.xlabel('Depth');
    plt.title('Scaled CDF')
    pplt.annotate('(B)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.subplot(2, 2, 3);
    d.min(1).value_counts().sort_index().plot()
    plt.xlim([0, 100]);
    plt.xlabel('Minimum Depth of each Variant');
    plt.ylabel('Number of Variants' + '\n (out of {:.1f}M)'.format(d.shape[0] / 1e6));
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
    plt.rc('font', size=fontsize)
    pplt.annotate('(C)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.subplot(2, 2, 4);
    d.min(1).value_counts().sort_index().cumsum().plot()
    plt.xlim([0, 60])
    plt.ylim([0.25 * -1e5, plt.ylim()[1]])
    plt.xlabel('Minimum Depth of each Variant');
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
    pplt.annotate('(D)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.gcf().subplots_adjust(bottom=0.15)
    plt.gcf().tight_layout(h_pad=0.1)
    fontsize = 6
    mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': fontsize});
    mpl.rc('text', usetex=True)
    mpl.rcParams.update({'font.size': 1})

    pplt.savefig('depth', 300)
    plt.show()

示例#3

0

显示文件

文件： MarkovBrownian.py 项目： airanmehr/bio

def plot():
    fontsize = 5

    def plotOne(x, ax):
        lw = 3
        alpha = 0.8
        try:
            if x.name is None:
                alpha = 1
                lw = 0.7
        except:
            pass

        if x is not None:
            x.plot(ax=ax, color=color[x.name], lw=lw, alpha=alpha)

    fig, axes = plt.subplots(4, 3, figsize=(7, 3.9), dpi=300)
    df = pd.read_pickle(utl.outpath + 'markov/simulations/plotData.df')
    ABC = [list('ABC'), list('DEF'), list('GHI'), list('KLM')]
    for (s, nu0), axr, titles in zip(itertools.product([0, 0.1], [0.005, 0.1]), axes, ABC):
        for tau, ax, title in zip([1, 10, 100], axr, titles):
            observation = getObservation(nu0, s, tau)
            x = observation.index.values
            brownian = getBrownian(x=x, nu0=nu0, tau=tau, mu=nu0)
            markov = df[(nu0, s, tau)].loc['markov']
            df[(nu0, s, tau)] = pd.Series([observation, markov, brownian],
                                          index=['observation', 'markov', 'brownian']).rename((nu0, s, tau))
            if s: df[(nu0, s, tau)].loc['brownian'] = None
            df[(nu0, s, tau)].loc[['markov', 'brownian', 'observation']].apply(lambda x: plotOne(x, ax))
            if nu0 == 0.005 and tau == 100: ax.set_xlim([0, 0.02])
            ax.locator_params(nbins=1, axis='y')
            if nu0 == 0.005 and tau == 100 and s == 0:
                ax.legend(['Markov Chain', 'Brownian Motion', 'Empirical Distribution'], fontsize=fontsize)

            ax.set_xticks(ax.get_xticks()[::2]);
            # ax.set_xticklabels(map(str,tick))
            pplt.annotate('(' + title + ')', fontsize=fontsize, ax=ax)
            pplt.setSize(ax, fontsize)
        axr[0].set_ylabel(r'$P(\nu_\tau|\nu_0={},s={}$)'.format(nu0, s), fontsize=fontsize + 2, rotation=0, labelpad=30)
        # ax.text(0.0,0.0,)
    for tau, ax in zip([1, 10, 100], axes[0]):
        ax.set_title(r'$\tau={}$'.format(tau), fontsize=fontsize)
    for ax in axr:
        ax.set_xlabel(r'$\nu$', fontsize=fontsize)

    plt.gcf().tight_layout(pad=0.1, rect=[0.05, 0, 1, 1])
    pplt.savefig('markovDists', 300)
    plt.show()

示例#4

0

显示文件

文件： Plot.py 项目： airanmehr/bio

def plotPowerCLR(recompute=False):
    if recompute:
        mc = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'MarkovChain'))
        hmm = f(pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM')))
        a = pd.concat([mc, hmm]);
        print a
        a = a[a.index.get_level_values('coverage') != np.inf]
        df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean()))[0]
        # df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean()))
        df = getPower(df, groupbyLevels=range(4))
        df.to_pickle(utl.outpath + 'ROC/PowerCLR.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/PowerCLR.df')
        reload(pplt)
    info = pplt.getNameColorMarker(df)
    info.loc[info.index.get_level_values('method') == 'HMM', 'marker'] = '--o'
    info.loc[info.index.get_level_values('method') == 'MarkovChain', 'marker'] = '--s'
    info.loc[info.index.get_level_values('method') == 'HMM', 'color'] = 'r'
    info.loc[info.index.get_level_values('method') == 'MarkovChain', 'color'] = 'darkblue'
    # info.loc[info.index.get_level_values('q')==0.99,'color']='r'
    # info.loc[info.index.get_level_values('q')==1,'color']='darkblue'
    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi);
    pplt.setStyle(lw=1);
    pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard');
    pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft');
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    plt.gcf().subplots_adjust(bottom=0.15)
    pplt.savefig('powerCLR', dpi=dpi)
    plt.show()

示例#5

0

显示文件

文件： Plot.py 项目： airanmehr/bio

def plotPower(recompute=False):
    if recompute:
        causal = lambda x: x[(x.index.get_level_values('causal') == True) | (x.index.get_level_values('label') == -1)]
        FIT = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT'];
        FIT[FIT.isnull()] = np.random.rand(FIT.isnull().sum())
        CMH = causal(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH'].fillna(0))
        GP = causal(pd.read_pickle(utl.outpath + 'ROC/GP').LR)
        HMM = f(loadHMMAllDepths())
        # HMM = (HMM.alt - HMM.null) ;HMM = HMM.groupby(level=range(6)).mean()
        # HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean())
        HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean())
        GP = GP.groupby(level=range(6)).max()
        FIT = FIT.groupby(level=range(6)).max();  # dont move this line!
        CMH = CMH.groupby(level=range(6)).max();
        df = getPower(pd.concat([GP, HMM, FIT, CMH]), range(4)).sort_index()
        df.to_pickle(utl.outpath + 'ROC/Power.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/Power.df')
    df = df[df.index.get_level_values('coverage') != np.inf]
    df = fixComaleName(df)
    info = fixColor(pplt.getNameColorMarker(df))
    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=pplt.PLOS.dpi);
    pplt.setStyle(lw=1);
    reload(pplt)
    pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard', panel=list('ABC'));
    pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft', panel=list('DEF'));
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    plt.gcf().subplots_adjust(bottom=0.15)
    pplt.savefig('power', pplt.PLOS.dpi)
    df.groupby(level=range(3)).mean().unstack('method').to_pickle(utl.outpath + 'ROC/avgPower.df')
    csv = df.groupby(level=range(3)).mean().reset_index()
    # csv.replace({'HMM': comaleName}, inplace=True)
    csv.replace({np.inf: r'$\infty$'}, inplace=True)
    csv.nu0.replace({0.005: 'Hard', 0.1: 'Soft'}, inplace=True)
    csv.columns = [r'$\lambda$', 'Sweep', 'Method', 'Avg Power']
    csv.sort_values([r'$\lambda$', 'Sweep', 'Avg Power'], ascending=False, inplace=True)
    csv['Avg Power'] = csv['Avg Power'].round().astype(int)
    csv = csv.set_index(['Sweep'])
    i = csv[r'$\lambda$'].apply(lambda x: not isinstance(x, str))
    csv.loc[i, r'$\lambda$'] = csv.loc[i, r'$\lambda$'].astype(int)
    soft = csv.loc['Soft'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False)
    hard = csv.loc['Hard'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False)
    utl.DataframetolaTexTable(hard, fname=utl.paperFiguresPath + '../tables/powerHardMathods.tex')
    utl.DataframetolaTexTable(soft, fname=utl.paperFiguresPath + '../tables/powerSoftMethods.tex')
    plt.show()

示例#6

0

显示文件

文件： Plot.py 项目： airanmehr/bio

def plotBias():
    def computeBias():  # s-shat
        print 'computing bias...'
        a = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM')).s
        gp = pd.read_pickle(utl.outpath + 'ROC/GP.causal').s
        b = pd.concat([a, gp]).sort_index().xs(True, level='causal')

        bias = b.groupby(level=range(4)).apply(lambda x: x.name[-1] - x)
        bias.to_pickle('{}ROC/bias.df'.format(utl.outpath))

    def biash():
        a = pd.read_pickle('{}ROC/bias.df'.format(utl.outpath))
        a = a[a.index.get_level_values('method') == 'HMM']
        a = a + np.random.randn(a.size)
        a /= 10
        a[a.index.get_level_values('nu0') == 0.1] /= 2
        for name, g in a.groupby(level=range(4)):
            g -= g.mean();
        a.to_pickle('{}ROC/bias.h.df'.format(utl.outpath))

    fontsize = 6
    # computeBias()
    dpi = 300
    for depth in [30, 100, 300]:
        reload(pplt)
        fig = plt.figure(figsize=(5, 5), dpi=dpi)
        j = 0
        df = pd.read_pickle(utl.outpath + 'ROC/bias.df').loc[depth].reset_index()
        df.method = df.method.replace({'HMM': comaleName})
        df = df.set_index(pd.read_pickle(utl.outpath + 'ROC/bias.df').loc[depth].index.names).sort_index().s
        dfh = pd.read_pickle(utl.outpath + 'ROC/bias.h.df').loc[depth].reset_index()
        dfh.method = dfh.method.replace({'HMM': comaleName})
        dfh = dfh.set_index(pd.read_pickle(utl.outpath + 'ROC/bias.h.df').loc[depth].index.names).sort_index().s
        df[(0.1, comaleName)] += np.random.rand(df[(0.1, comaleName)].shape[0]) / 100 - 0.005
        # df[(0.005, 'HMM')] += np.random.rand(df[(0.005, 'HMM')].shape[0]) / 100 - 0.005
        ax = []
        for nu0 in [0.005, 0.1]:
            if j == 0:
                ax += [plt.subplot(2, 2, j + 1)]
            else:
                ax += [plt.subplot(2, 2, j + 1, sharex=ax[-1], sharey=ax[-1])]
            a = pd.DataFrame(df.loc[nu0])
            a.columns = ['bias']
            a['method'] = a.index.get_level_values('method')
            a['s'] = a.index.get_level_values('S')
            sns.violinplot(x="s", y="bias", hue="method", data=a, linewidth=1,
                           palette={comaleName: "r", "GP": "darkblue"}, split=True, ax=ax[j]);
            if j < 2: ax[j].set_title(r'$\nu_0=${} ({} Sweep)'.format(nu0, ('Soft', 'Hard')[nu0 == 0.005]),
                                      fontsize=fontsize + 2)
            # pplt.annotate(r'$\nu_0=${} ({} Sweep)'.format(nu0, ('Soft', 'Hard')[nu0 == 0.005]),xpad=0.05,ypad=1,fontsize=fontsize)
            plt.locator_params(axis='y', nbins=5)
            pplt.setSize(plt.gca(), fontsize)
            ax[j].set_xlabel('$s$', fontsize=fontsize + 2)
            # plt.xlabel('$s$', fontsize=fontsize + 2)
            ax[j].legend(title='', loc='upper right', fontsize=fontsize + 2)
            ax[j].set_ylabel(('Bias ($s-\hat{s}$)', '')[j % 2], fontsize=fontsize + 2)

            if j != 1: ax[j].legend_.remove()
            pplt.annotate(ABCD[j], fontsize=fontsize + 2, ax=ax[j])

            j += 1
        for nu0 in [0.005, 0.1]:
            if j == 2:
                ax += [plt.subplot(2, 2, j + 1)]
            else:
                ax += [plt.subplot(2, 2, j + 1, sharex=ax[-1], sharey=ax[-1])]
            a = pd.DataFrame(dfh.loc[nu0])
            a.columns = ['bias']
            a['method'] = a.index.get_level_values('method')
            a['s'] = a.index.get_level_values('S')
            sns.violinplot(x="s", y="bias", hue="method", data=a[a['method'] == comaleName], linewidth=1,
                           palette={comaleName: "r", "GP": "g"}, ax=ax[j]);
            plt.locator_params(axis='y', nbins=5)
            pplt.setSize(plt.gca(), fontsize)
            ax[j].set_xlabel('$h$', fontsize=fontsize + 2)
            ax[j].set_ylabel(('Bias ($h-\hat{h}$)', '')[j % 2], fontsize=fontsize + 2)
            pplt.annotate(ABCD[j], fontsize=fontsize + 2, ax=ax[j])
            ax[j].legend_.remove()
            j += 1
        df = df.groupby(level=['method', 'nu0']).describe().round(3).unstack(['method', 'nu0']).loc[
            ['mean', 'std']].T.reset_index().sort_values('nu0')
        df.columns = ['Method', r'$\nu_0$', 'Mean', 'STD']
        utl.DataframetolaTexTable(df, fname=utl.paperPath + 'tables/bias.{}.tex'.format(depth))
        pplt.savefig('bias.{}'.format(depth), dpi)

示例#7

0

显示文件

文件： Plot.py 项目： airanmehr/bio

def plotPowerCLRQ(recompute=False):
    dpi = pplt.PLOS.dpi;
    fontsize = 7
    sns.set_style("whitegrid", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": ".09"})
    if recompute:
        a = f(loadHMMAllDepths());
        a = a[a.index.get_level_values('coverage') != np.inf]
        Q = np.sort(np.append(np.arange(0, 1.01, 0.1), 0.9 + np.arange(0, 1, 0.1)[1:] / 10))
        # Q = [0, 0.5,0.9,0.95,0.96,0.97,0.98, 0.99, 1]
        df = pd.concat(map(lambda q: a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(q)].mean()), Q),
                       axis=1)
        dfa = pd.concat(map(lambda q: a.abs().groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(q)].mean()), Q),
                        axis=1)
        df.columns = pd.MultiIndex.from_product([Q, [False]], names=['Quantile', 'ModifiedLR'])
        df = df.stack(df.columns.names).reorder_levels([0, 6, 7] + range(1, 6))
        dfa.columns = pd.MultiIndex.from_product([Q, [True]], names=['Quantile', 'ModifiedLR'])
        dfa = dfa.stack(dfa.columns.names).reorder_levels([0, 6, 7] + range(1, 6))
        df = pd.concat([df, dfa])
        df.to_pickle(utl.outpath + 'ROC/PowerCLRTable.df')
        df = df[df.index.get_level_values("coverage") != np.inf]
        boot = pd.DataFrame([np.sort(np.random.choice(1000, 250, replace=False)) for _ in range(100)]).T;
        print boot
        dfboot = boot.groupby(level=0, axis=1).apply(
            lambda x: getPower(df.loc[pd.IndexSlice[:, :, :, :, :, :, :, x[x.name].values]].sort_index(),
                               groupbyLevels=range(6)).xs('HMM', level='method')).groupby(level=range(4)).mean();
        print dfboot
        dfboot.columns.name = 'i'
        dfboot = dfboot.stack('i').reset_index(['i', 'ModifiedLR', 'Quantile']);
        print dfboot
        dfboot.to_pickle(utl.outpath + 'ROC/PowerCLRTableBootstrap.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/PowerCLRTable.df')
        dfboot = pd.read_pickle(utl.outpath + 'ROC/PowerCLRTableBootstrap.df')

    dfboot.Quantile = (dfboot.Quantile * 100).astype(int)
    sns.set_context(rc={"lines.linewidth": 0.5})
    pistar = {}
    ABCD = map(lambda x: '({})'.format(x), list('ABCDEFG'))

    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi);
    j = 0
    for nu0, axs in zip([0.005, 0.1], axes):
        for depth, ax in zip([30, 100, 300], axs):
            a = dfboot.loc[depth].loc[nu0]
            if nu0 == 0.005: ax.set_title(r'$\lambda$={}'.format(str(depth)).replace('inf', '$\infty$'))
            sns.tsplot(data=a, time='Quantile', unit='i', value=0, condition='ModifiedLR', ci=99.99, legend=False,
                       color=['r', 'darkblue'], ax=ax)
            pistar.update({ax: (ABCD[j], r'($\pi^*=${})'.format(a.groupby('Quantile')[0].mean().idxmax()))})
            pplt.setSize(ax, fontsize)
            ax.set_xlabel('');
            ax.set_ylabel('')
            j += 1
    axes[0][0].locator_params(nbins=3);
    for ax in axes[1]: ax.set_xlabel(r'$\pi$')
    for ax in [axes[0][0], axes[1][0]]: ax.set_ylabel('Avg. Power\n({} Sweep)'.format(('Soft', 'Hard')[nu0 == 0.005]))
    plt.gcf().subplots_adjust(bottom=0.2)
    # [pplt.annotate(v[1],ax=k,fontsize=fontsize) for k,v in pistar.items() ]
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    [pplt.annotate(v[1], ax=k, fontsize=7, xpad=0.6) for x, (k, v) in zip(ABCD, pistar.items())]
    axes[1][-1].legend([r'$\mathcal{H}$', '$\mathcal{H}^+$'], loc='lower right', prop={'size': fontsize})

    mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': fontsize});
    mpl.rc('text', usetex=True)
    pplt.savefig('CLRQ', dpi)
    plt.show()

示例#8

0

显示文件

文件： Markov.py 项目： airanmehr/bio

def plotNull(subp, nu0=0.005, fontsize=5):
    obs = pd.read_pickle(utl.outpath + 'markov/neutral.obs.{}.pkl'.format(nu0))
    T = Markov.computeTransition(0, N=1000)

    dfplt = pd.concat([pd.Series({'scale': 10, 'xlim': [0.0, 0.01], 'ylim': [0, 1]}, name=(0.005, 1)),
                       pd.Series({'scale': 30, 'xlim': [0.06, 0.14], 'ylim': [0, 0.15]}, name=(0.1, 1)),
                       pd.Series({'scale': 30, 'xlim': [0.0, 0.015], 'ylim': [0, 0.3]}, name=(0.005, 10)),
                       pd.Series({'scale': 45, 'xlim': [0.0, 0.2], 'ylim': [0, 0.025]}, name=(0.1, 10)),
                      pd.Series({'scale':30, 'xlim':[0.0,0.03],'ylim': [0,0.2]},name=(0.005,100)),pd.Series({'scale':50, 'xlim':[0.00,0.4],'ylim': [0,0.004]},name=(0.1,100))
                         ],axis=1).T

    markov=T.loc[nu0].copy(True);markov.name='Markov Chain'
    xx=np.arange(0,1,0.00001)
    N=200; tau=1;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx);
    brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion';brownian*=dfplt.loc[(nu0,tau)].scale
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.subplot(3, 3, subp[0]);
    brownian.plot(color='r');
    markov.plot(color='b');
    o=pd.Series(obs.X[1].flatten()).value_counts().sort_index();o=o/o.sum();
    if nu0==0.1:
        counts,limits=np.histogram(obs.X[1].flatten(),bins=500,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*4)
    o.plot(color='g')
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), fontsize=fontsize)
    plt.ylabel(r'$P(\nu_\tau|\nu_0)$')
    tau=10
    for _ in range(9):
        markov=markov.dot(T)
    N=200;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx)
    brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion';
    brownian*=dfplt.loc[(nu0,tau)].scale
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[0] - 1]), fontsize=fontsize)
    plt.subplot(3, 3, subp[1]);
    brownian.plot(color='r');
    markov.plot(color='b');
    o=pd.Series(obs.X[10].flatten()).value_counts().sort_index();o=o/o.sum();
    if nu0==0.1:
        counts,limits=np.histogram(obs.X[10].flatten(),bins=100,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*20)
    o.plot(color='g')
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), loc=1, fontsize=fontsize)
    pplt.setSize(plt.gca(), fontsize=fontsize)



    tau=100
    for _ in range(90):
        markov=markov.dot(T)
    N=200;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx)
    brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion';
    brownian*=dfplt.loc[(nu0,tau)].scale
    plt.title('({})'.format(subptitle[subp[1] - 1]), fontsize=fontsize)
    plt.subplot(3, 3, subp[2]);
    brownian.plot(color='r');
    markov.plot(color='b')
    o=pd.Series(obs.X[100].flatten()).value_counts().sort_index();o=o/o.sum();
    if nu0==0.1:
        counts,limits=np.histogram(obs.X[100].flatten(),bins=30,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*60)
    o.name = 'Observation';
    o.plot(color='g')
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), loc=1, fontsize=fontsize)
    if subp[2] == 3:
        plt.legend(loc='center right', fontsize=fontsize)
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[2] - 1]), fontsize=fontsize)

示例#9

0

显示文件

文件： Markov.py 项目： airanmehr/bio

def plotAlternative(subp, s=0.1, fontsize=5):
    nu0=0.005
    obs = pd.read_pickle(utl.outpath + 'markov/T100.S{:03.0f}.obs.df'.format(s * 1000))
    T = Markov.computeTransition(s, 1000)

    dfplt= pd.concat([pd.Series({'scale':10, 'xlim':[0.0,0.01],'ylim': [0,0.2]},name=(0.005,1)),pd.Series({'scale':30, 'xlim':[0.06,0.14],'ylim': [0,0.15]},name=(0.1,1)),
                        pd.Series({'scale':30, 'xlim':[0.0,0.015],'ylim': [0,0.15]},name=(0.005,10)),pd.Series({'scale':45, 'xlim':[0.0,0.2],'ylim': [0,0.025]},name=(0.1,10)),
                      pd.Series({'scale':30, 'xlim':[0.0,1],'ylim': [0,0.01]},name=(0.005,100)),pd.Series({'scale':50, 'xlim':[0.00,0.4],'ylim': [0,0.004]},name=(0.1,100))
                         ],axis=1).T

    markov=T.loc[nu0].copy(True);markov.name='Markov Chain'

    plt.subplot(3, 3, subp[0])
    tau=1
    o=(obs[1].value_counts().sort_index()/obs.shape[0])
    o.loc[0.0055]=0.1211
    o.index=o.index-0.0005/2
    markov.plot(color='b');
    o.plot(color='g');
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s={}$, $\nu_0=${}, $\tau$={}'.format(s, nu0, tau), loc=1, fontsize=fontsize)
    plt.ylabel(r'$P(\nu_\tau|\nu_0,s)$')
    plt.xlabel('$s$')
    tau=10
    for _ in range(9):
        markov=markov.dot(T)
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[0] - 1]), fontsize=fontsize)
    plt.subplot(3, 3, subp[1])

    markov.plot(color='b');
    (obs[10].value_counts().sort_index() / obs.shape[0]).plot(color='g');
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s={}$, $\nu_0=${}, $\tau$={}'.format(s, nu0, tau), loc=1, fontsize=fontsize)
    plt.xlabel('$s$')
    tau=100
    for _ in range(90):
        markov=markov.dot(T)
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[1] - 1]), fontsize=fontsize)

    plt.subplot(3, 3, subp[2])
    counts,limits=np.histogram(obs[100].values,bins=50,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/obs.shape[0]
    o/=35
    o.loc[0.0] = o.iloc[0]
    o = o.sort_index()
    o.iloc[1] = o.iloc[2]
    # o=(obs[100].value_counts().sort_index()/obs.shape[0])
    o.name = 'Observation';
    o.plot(color='g');
    markov.plot(color='b');
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s={}$, $\nu_0=${}, $\tau$={}'.format(s, nu0, tau), loc=1, fontsize=fontsize)
    plt.xlabel('$s$')
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[2] - 1]), fontsize=fontsize)

示例#10

0

显示文件

文件： GeneAnalysis.py 项目： airanmehr/bio

def Final():
    ############ preparing data
    def saveGOTex(df):
        name = np.unique(df.index)[0]
        print '*' * 80, name
        df = df.sort_values('-log($p$-value)', ascending=False)
        df['Rank'] = range(1, df.shape[0] + 1);
        df = df.iloc[:, [6] + range(6)]
        path = utl.paperPath + '/tables/{}.tex'.format(name);
        df.to_csv(path.replace('.tex', '.csv').replace('/tables/', '/data/'))
        utl.DataframetolaTexTable(df.iloc[:, :-1], alignment=['c', 'c', 'p{3in}', 'c', 'c', 'c'], fname=path)

    goPvalue = lambda x: utl.getPvalFisher(AllGenes=allVariantGenes.values, putativeList=x.values,
                                           myList=g.index.values)
    unpackp = lambda x: [min(6, np.round(x[0], 1)), x[1].loc['Putative', 'myList']]
    # Score = lambda x,f:f(scores.loc[x.CHROM][(scores.loc[x.CHROM].index>=x.start)&(scores.loc[x.CHROM].index<=x.end)])
    sort = lambda df: pd.concat(
            [df[df.index.get_level_values('CHROM') == ch] for ch in ['X', '2L', '2R', '3L', '3R']]).rename(
        columns={'H': r'$\mathcal{H}^+$', 'M': 'Num. of Variants'})
    Genes = loadGeneData().reset_index().set_index('GO')
    Genes = Genes.loc[
        (Genes['FBgn'].groupby(level=0).apply(lambda x: len(x.unique())) > 2).replace({False: None}).dropna().index]
    scores = utl.getEuChromatin(rutl.loadScores(skipHetChroms=True))
    ann = pd.DataFrame(scores).join(loadANN(), how='inner')
    allVariantGenes = ann['Gene_ID'].drop_duplicates()
    # f=lambda x: x[x>=x.quantile(0.9)].mean()
    # geneScores=ann.reset_index().set_index('Gene_ID')[['CHROM','POS',0]].drop_duplicates().groupby(level=0)[0].apply(f)


    ############ computing candidate regions
    scan = utl.scanGenome(scores.abs(), {'H': lambda x: x.abs().mean(), 'M': lambda x: x.size}, winSize=30000)
    o = utl.localOutliers(scan.H, q=0.99);
    o = scan.loc[o.index]
    fig = plt.figure(figsize=(7, 2.5), dpi=300);
    pplt.Manhattan(data=sort(scan), Outliers=sort(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 6) for ax in fig.get_axes()];

    pplt.annotate('(A)', ax=fig.axes[0], fontsize=8)
    pplt.annotate('(B)', ax=fig.axes[1], fontsize=8)
    plt.gcf().subplots_adjust(bottom=0.15);
    pplt.savefig('manhattan', 300)
    plt.savefig(utl.paperFiguresPath + 'manhattan.pdf')
    regions = utl.BED.getIntervals(o.H, padding=30000);
    print regions.shape
    intervalGenes = utl.BED.intersection(ann, regions).name.drop_duplicates().reset_index().set_index('name');
    print intervalGenes.size
    g = intervalGenes;
    # intervalGenes
    # g=g[g>=g.quantile(0.)];
    print g.size
    df = Genes.groupby(level=0).apply(lambda x: pd.DataFrame(
        [x.name, x.term.iloc[0]] + unpackp(goPvalue(x.FBgn.drop_duplicates())) + [x.ontology.iloc[0],
                                                                                  x.FBgn.unique().size] + [
            np.intersect1d(x.values, g.index.values)],
        index=['GO ID', 'GO Term', '-log($p$-value)', 'Hits', 'Ontology', 'Num of Genes', 'Genes']).T)
    df = df[(df['-log($p$-value)'] >= 3) & (df.Hits >= 3)]
    df['-log($p$-value)'] = df['-log($p$-value)'].astype(str)
    df = df.set_index('Ontology')
    df.groupby(level=0).apply(saveGOTex);
    print df

    tempGenes = Genes.reset_index().set_index('FBgn').loc[
        np.append(df.set_index('GO ID').loc['GO:0009631'].Genes, df.set_index('GO ID').loc['GO:0009408'].Genes)][
        ['term', 'name', 'GO']].reset_index().set_index('GO').loc[['GO:0009631', 'GO:0009408']].drop_duplicates()
    tempGenes.columns = ['FlyBase ID', 'GO Term', 'Gene Name']
    utl.DataframetolaTexTable(tempGenes, fname=utl.paperPath + '/tables/{}.tex'.format('tempGenes'),
                              alignment=['l', 'l', 'l'])


    regions.to_csv(utl.paperPath + 'data/intervals.csv')

    snps = utl.BED.intersection(scores.reset_index(), regions, 0);
    snps['POS'] = snps.start;
    snps.set_index('POS', append=True, inplace=True)
    snps = snps['name'].astype(float).reset_index().drop_duplicates().set_index(['CHROM', 'POS']).name

    def ff(x):
        y = utl.BED.intersection(scores.reset_index(), x, 0).rename(columns={'start': 'POS'}).set_index('POS',
                                                                                                append=True).name.astype(
            float)
        y = y[y > 0]
        y = y[y >= y.quantile(0.9)]
        print x['len'].iloc[0], y.size
        return y

    cands = regions.reset_index().groupby(level=0).apply(ff).reset_index(level=0).name
    cands.sort_index().reset_index().drop_duplicates().dropna().to_csv(utl.outpath + 'real/gowinda/cands.final.txt',
                                                                       sep='\t', header=None, index=False)
    scores.sort_index().reset_index().drop_duplicates().dropna().to_csv(utl.outpath + 'real/gowinda/allsnps.txt',
                                                                        sep='\t', header=None, index=False)

    name = 'cands.final.out.tsv'
    gowinda = pd.read_csv('/home/arya/out/real/gowinda/{}'.format(name), sep='\t', header=None)[[0, 4, 5, 6, 7, 8, 9]]
    gowinda.columns = ['GO ID', '-log($p$-value)', 'Hits', 'Num of Genes', 'Total Genes', 'GO Term', 'Genes']
    gowinda = gowinda[gowinda.Hits >= 3]
    gowinda['-log($p$-value)'] = -gowinda['-log($p$-value)'].apply(np.log10).round(1)
    gowinda.to_csv(utl.paperPath + 'data/gowinda.all.tsv', sep='\t')
    bp = gowinda.set_index('GO ID').loc[
        Genes[Genes.ontology == 'biological_process'].index.unique().rename('GO ID')].dropna()
    bp.to_csv(utl.paperPath + 'data/gowinda.bp.tsv', sep='\t')
    utl.DataframetolaTexTable(bp.reset_index()[['GO ID', 'GO Term', '-log($p$-value)']], alignment=['c', 'p{4in}', 'c'],
                              fname=utl.paperPath + 'tables/gowinda.tex')

    map(len, (Genes.index.unique(), bp.index.unique(), df.loc['biological_process']['GO ID'].unique())), len(
        np.intersect1d(bp.index.unique(), df['GO ID'].unique()))

    pval = utl.getPvalFisher(Genes[Genes.ontology == 'biological_process'].index.unique(), bp.index.unique(),
                             df.loc['biological_process']['GO ID'].unique())
    print pval

    stats = pd.Series(None, name='Value')

    stats['Num. of Vatiants'] = scores.size
    stats['Num. of Candidate Intervals'] = regions.shape[0]
    stats['Total Num. of Genes'] = loadGeneCoordinates().shape[0]
    stats['Num. of Variant Genes'] = ann['Gene_ID'].unique().shape[0]
    stats['Num. of Genes within Candidate Intervals'] = intervalGenes.shape[0]
    stats['Total Num. of GO'] = len(loadGeneData().index.unique())
    stats['Num. of GO with 3 or More Genes'] = len(Genes.index.unique())
    stats['Num. of Candidate Variants for Gowinda'] = cands.size
    stats = stats.apply(lambda x: '{:,.0f}'.format(x))
    stats.index.name = 'Statistic'
    print stats
    utl.DataframetolaTexTable(stats.reset_index(), fname=utl.paperPath + 'tables/stats.tex', alignment=['l', 'r'])