def plotBottleneck(maxGen=None,obs=False,mean=True,color='blue'): exit() def plotOne(df, ax, method): m=df.mean(1) s=df.std(1) # plt.locator_params(nbins=4); m.plot(ax=ax, legend=False, linewidth=3, color=color) x=m.index.values m=m.values;s=s.values ax.fill_between(x, m - 2 * s, m + 2 * s, color=color, alpha=0.3) ax.set_ylabel(method.strip()) ax.set_ylim([-0.1, ax.get_ylim()[1]]) pplt.setSize(ax) dfn = \ pd.read_pickle(path + 'nu{}.s{}.df'.format(0.005, 0.0)) fig, ax = plt.subplots(3, 1, sharex=True, figsize=(4, 3), dpi=300) plotOne(dfn['tajimaD'], ax[0], "Tajima's $D$"); plt.xlabel('Generations') plotOne(dfn['HAF'], ax[1], "Fay Wu's $H$"); plt.xlabel('Generations') plotOne(dfn['SFSelect'], ax[2], 'SFSelect'); plt.xlabel('Generations') plt.gcf().subplots_adjust(bottom=0.25) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}); mpl.rc('text', usetex=True) pplt.savefig('bottleneck', 300) plt.show()
def plotLD3d(): fig = plt.figure(figsize=(7, 6), dpi=300) ax = fig.add_subplot(2, 1, 1, projection='3d') plotLDDecaySelection3d(ax) ax = fig.add_subplot(2, 1, 2, projection='3d') plotLDDecaySelection3d(ax, True) pplt.savefig('LDDecay3d', 200)
def plotPowerCLR(recompute=False): if recompute: mc = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'MarkovChain')) hmm = f(pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM'))) a = pd.concat([mc, hmm]); print a a = a[a.index.get_level_values('coverage') != np.inf] df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean()))[0] # df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean())) df = getPower(df, groupbyLevels=range(4)) df.to_pickle(utl.outpath + 'ROC/PowerCLR.df') else: df = pd.read_pickle(utl.outpath + 'ROC/PowerCLR.df') reload(pplt) info = pplt.getNameColorMarker(df) info.loc[info.index.get_level_values('method') == 'HMM', 'marker'] = '--o' info.loc[info.index.get_level_values('method') == 'MarkovChain', 'marker'] = '--s' info.loc[info.index.get_level_values('method') == 'HMM', 'color'] = 'r' info.loc[info.index.get_level_values('method') == 'MarkovChain', 'color'] = 'darkblue' # info.loc[info.index.get_level_values('q')==0.99,'color']='r' # info.loc[info.index.get_level_values('q')==1,'color']='darkblue' fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi); pplt.setStyle(lw=1); pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard'); pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft'); [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))] plt.gcf().subplots_adjust(bottom=0.15) pplt.savefig('powerCLR', dpi=dpi) plt.show()
def outlier(): scores = rutl.removeHeteroChromatin(rutl.loadScores()) field = comale; df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[ [field, 'Num. of SNPs']] a = df.iloc[:, 0] a = a.rename('Global Outliers'); o = a[a > a.quantile(0.99)] o.to_pickle(utl.outpath + 'real/outliers.global.df') fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('global')) a = a.rename('Chrom Outliers'); o = a.groupby(level=0).apply(lambda x: x[x > x.quantile(0.99)].loc[x.name]) o.to_pickle(utl.outpath + 'real/outliers.chrom.df') fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()] plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('chrom')) a = a.rename('Local Outliers'); o = localOutliers(a) o.to_pickle(utl.outpath + 'real/outliers.local.df') fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()] plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('local'))
def plotOne(df, ax, method): m=df.mean(1) s=df.std(1) # plt.locator_params(nbins=4); m.plot(ax=ax, legend=False, linewidth=3, color=color) x=m.index.values m=m.values;s=s.values ax.fill_between(x, m - 2 * s, m + 2 * s, color=color, alpha=0.3) ax.set_ylabel(method.strip()) ax.set_ylim([-0.1, ax.get_ylim()[1]]) pplt.setSize(ax)
def plotScalingFactor(): r=2*1e-8 l = 5e4 dpi = 300 j = 0 for nu0 in [0.005, 0.1]: for s in [0.025, 0.1]: t = np.arange(0, 2 * (utl.logit(0.995) - utl.logit(nu0)) / s + 1., 1) fig, ax = plt.subplots(2, 1, figsize=(5.5, 2.5), dpi=dpi, sharex=True); nu(t, s=s, nu0=nu0).plot(color='k', legend=False, ax=ax[0]) pplt.annotate(r'$s$={}, $\nu_0=${} ({} Sweep)'.format(s, nu0, ('Soft', 'Hard')[nu0 == 0.005]), fontsize=7, ax=ax[0]) pplt.setSize(ax=ax[0], fontsize=6) ax[0].set_ylabel(r'$\nu_t$') # H0 = H(t[0], s=s, nu0=nu0) Ht = H(t, s=s, nu0=nu0) df = pd.DataFrame([np.log(Ht / H0), -2 * r * t * l], columns=t, index=['log(Growth)', r'log(Decay)']).T df['log(Growth) + log(Decay)'] = df.sum(1) df.plot(ax=ax[1], grid=True, linewidth=2); ax[1].set_xlabel('Generations'); ax[1].set_ylabel('Log(Scaling Factor)') ax[1].axvline(df.iloc[1:, 2].abs().idxmin(), color='k', linestyle='--', linewidth=0.5) # if j != 3: # ax[1].legend_.remove() # else: ax[1].legend(['log(Growth)', r'log(Decay)', 'log(Growth) + log(Decay)'], bbox_to_anchor=(1.45, .75), prop={'size': 6}) pplt.setSize(ax[1], fontsize=6) plt.tight_layout(pad=0.1, rect=[0, 0, 0.7, 1]) plt.gcf().subplots_adjust(bottom=0.15) pplt.savefig('decayFactors{}'.format(j), dpi=dpi) j += 1
def scanSFS(): scores = rutl.loadScores() field = comale; df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[ [field, 'Num. of SNPs']] plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all') nu0 = rutl.getNut(0) nut = rutl.getNut(59) reload(rutl) # n= int(pd.read_pickle(utl.outpath + 'real/CD.F59.df').loc[:,pd.IndexSlice[:,0,'D']].mean().mean()) n = 100 SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=n) sf0 = scanOne(nu0, SFSelect, 'SFSelect.Base', 'SFSelect.Base'); SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=n) sft = scanOne(nut, SFSelect, 'SFSelect.Final', 'SFSelect.Final') sfr = pd.concat( [(sft.iloc[:, 0] - sf0.iloc[:, 0]).rename('SFS(59)-SFS(0)'), sf0.iloc[:, 0], sft.iloc[:, 0], df.iloc[:, 0]], axis=1) outlier = sfr[sfr.iloc[:, 0] > sfr.iloc[:, 0].quantile(0.99)] sfr.loc[(sfr.iloc[:, 0] < 0).values, sfr.columns[0]] = None fig = plt.figure(figsize=(7, 4.5), dpi=300); pplt.Manhattan(data=sfr, Outliers=outlier, fig=fig, markerSize=2, ticksize=8, sortedAlready=True) [pplt.setSize(ax, 5) for ax in fig.get_axes()] plt.savefig(utl.paperPath + 'new/{}.pdf'.format('sfs-clear'))
def plotOne(df, outlier, fname=None, dashedline=True): fig = plt.figure(figsize=(7, 2.5), dpi=300) pplt.Manhattan(data=df, Outliers=outlier, fig=fig, markerSize=2, ticksize=8, sortedAlready=True) [pplt.setSize(ax, 8) for ax in fig.get_axes()] plt.gcf().subplots_adjust(bottom=0.2) if dashedline: plt.gcf().axes[0].axhline(df.iloc[:, 0].quantile(0.99), linewidth=0.5, linestyle='--', color='k') if fname is not None: plt.savefig(utl.paperPath + 'new/{}.pdf'.format(fname))
def plotPower(recompute=False): if recompute: causal = lambda x: x[(x.index.get_level_values('causal') == True) | (x.index.get_level_values('label') == -1)] FIT = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT']; FIT[FIT.isnull()] = np.random.rand(FIT.isnull().sum()) CMH = causal(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH'].fillna(0)) GP = causal(pd.read_pickle(utl.outpath + 'ROC/GP').LR) HMM = f(loadHMMAllDepths()) # HMM = (HMM.alt - HMM.null) ;HMM = HMM.groupby(level=range(6)).mean() # HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean()) HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean()) GP = GP.groupby(level=range(6)).max() FIT = FIT.groupby(level=range(6)).max(); # dont move this line! CMH = CMH.groupby(level=range(6)).max(); df = getPower(pd.concat([GP, HMM, FIT, CMH]), range(4)).sort_index() df.to_pickle(utl.outpath + 'ROC/Power.df') else: df = pd.read_pickle(utl.outpath + 'ROC/Power.df') df = df[df.index.get_level_values('coverage') != np.inf] df = fixComaleName(df) info = fixColor(pplt.getNameColorMarker(df)) fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=pplt.PLOS.dpi); pplt.setStyle(lw=1); reload(pplt) pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard', panel=list('ABC')); pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft', panel=list('DEF')); [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))] plt.gcf().subplots_adjust(bottom=0.15) pplt.savefig('power', pplt.PLOS.dpi) df.groupby(level=range(3)).mean().unstack('method').to_pickle(utl.outpath + 'ROC/avgPower.df') csv = df.groupby(level=range(3)).mean().reset_index() # csv.replace({'HMM': comaleName}, inplace=True) csv.replace({np.inf: r'$\infty$'}, inplace=True) csv.nu0.replace({0.005: 'Hard', 0.1: 'Soft'}, inplace=True) csv.columns = [r'$\lambda$', 'Sweep', 'Method', 'Avg Power'] csv.sort_values([r'$\lambda$', 'Sweep', 'Avg Power'], ascending=False, inplace=True) csv['Avg Power'] = csv['Avg Power'].round().astype(int) csv = csv.set_index(['Sweep']) i = csv[r'$\lambda$'].apply(lambda x: not isinstance(x, str)) csv.loc[i, r'$\lambda$'] = csv.loc[i, r'$\lambda$'].astype(int) soft = csv.loc['Soft'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False) hard = csv.loc['Hard'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False) utl.DataframetolaTexTable(hard, fname=utl.paperFiguresPath + '../tables/powerHardMathods.tex') utl.DataframetolaTexTable(soft, fname=utl.paperFiguresPath + '../tables/powerSoftMethods.tex') plt.show()
def Final(): scores = rutl.loadScores(skipHetChroms=True).abs() a = sort(utl.scanGenome(scores.abs(), {'H': lambda x: x.abs().mean(), 'M': lambda x: x.size})) intervals = ga.getIntervals(o.H, padding=30000) fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=o, shade=intervals.reset_index(), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.suptitle((shades.shape[0], shades['len'].sum() / 1e6), fontsize=8) plt.savefig(utl.paperPath + 'new/{}.pdf'.format('CHROM.FDR_0.01'))
def plotSNPPval(out): scores = rutl.loadScores() kde = utl.getDensity(scores, width=1); pval = utl.getPvalKDE(out.sort_values(ascending=False).iloc[:1200], kde) print pval.sort_values() pval[pval >= 3].size df = pd.DataFrame(pval) df = pd.concat([df[df.index.get_level_values('CHROM') == ch] for ch in ['X', '2L', '2R', '3L', '3R', '4', '2LHet', '2RHet', '3LHet', '3RHet', 'XHet']]) fig = plt.figure(figsize=(7, 2), dpi=300); pplt.Manhattan(df, fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 8) for ax in fig.get_axes()]
def plotDepthHeterogenocity(): dpi = 300 sns.set_style("whitegrid", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": ".09"}) _, ax = plt.subplots(2, 2, sharex=True, figsize=(6, 4), dpi=dpi) d = pd.read_pickle(utl.outpath + 'real/CD.F59.df').xs('D', level='READ', axis=1) std = d.std(1) loc = [std.idxmax(), (std == std.quantile(0.52)).replace({False: None}).dropna().index[0], (std == std.median()).replace({False: None}).dropna().index[-1], (std == std.quantile(0.8)).replace({False: None}).dropna().index[0]] ax = ax.reshape(-1) fontsize = 6 for i, pos in enumerate(loc): eg = d.loc[pos] [eg[r].dropna().plot(marker='o', ax=ax[i], markersize=5) for r in range(3)]; plt.xticks(d.columns.get_level_values('GEN').unique()); plt.xlabel(''); plt.ylabel('') print 'position={}:{}'.format(eg.name[0], eg.name[1]), get_axis_limits() if i in [0, 2]: ax[i].set_ylabel('Read Depth') if i > 1: ax[i].set_xlabel('Generation') if i == 0: ax[i].legend(['Replicate 1', 'Replicate 2', 'Replicate 3'], loc='upper center', prop={'size': fontsize}) yrang = pplt.get_axis_limits(upper=True, ax=ax[i])[1] - pplt.get_axis_limits(upper=False, ax=ax[i])[1] ax[i].set_ylim([min(0, ax[i].get_ylim()[0] - 0.05 * yrang), ax[i].get_ylim()[1] + 0.03 * yrang]) ax[i].set_xlim([-2, 61]); ax[i].set_title('{}:{}'.format(eg.name[0], eg.name[1])) pplt.setSize(ax[i], fontsize) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}); mpl.rc('text', usetex=True) plt.gcf().subplots_adjust(bottom=0.15) pplt.savefig('depthHetero', dpi) plt.show()
def plot(): fontsize = 5 def plotOne(x, ax): lw = 3 alpha = 0.8 try: if x.name is None: alpha = 1 lw = 0.7 except: pass if x is not None: x.plot(ax=ax, color=color[x.name], lw=lw, alpha=alpha) fig, axes = plt.subplots(4, 3, figsize=(7, 3.9), dpi=300) df = pd.read_pickle(utl.outpath + 'markov/simulations/plotData.df') ABC = [list('ABC'), list('DEF'), list('GHI'), list('KLM')] for (s, nu0), axr, titles in zip(itertools.product([0, 0.1], [0.005, 0.1]), axes, ABC): for tau, ax, title in zip([1, 10, 100], axr, titles): observation = getObservation(nu0, s, tau) x = observation.index.values brownian = getBrownian(x=x, nu0=nu0, tau=tau, mu=nu0) markov = df[(nu0, s, tau)].loc['markov'] df[(nu0, s, tau)] = pd.Series([observation, markov, brownian], index=['observation', 'markov', 'brownian']).rename((nu0, s, tau)) if s: df[(nu0, s, tau)].loc['brownian'] = None df[(nu0, s, tau)].loc[['markov', 'brownian', 'observation']].apply(lambda x: plotOne(x, ax)) if nu0 == 0.005 and tau == 100: ax.set_xlim([0, 0.02]) ax.locator_params(nbins=1, axis='y') if nu0 == 0.005 and tau == 100 and s == 0: ax.legend(['Markov Chain', 'Brownian Motion', 'Empirical Distribution'], fontsize=fontsize) ax.set_xticks(ax.get_xticks()[::2]); # ax.set_xticklabels(map(str,tick)) pplt.annotate('(' + title + ')', fontsize=fontsize, ax=ax) pplt.setSize(ax, fontsize) axr[0].set_ylabel(r'$P(\nu_\tau|\nu_0={},s={}$)'.format(nu0, s), fontsize=fontsize + 2, rotation=0, labelpad=30) # ax.text(0.0,0.0,) for tau, ax in zip([1, 10, 100], axes[0]): ax.set_title(r'$\tau={}$'.format(tau), fontsize=fontsize) for ax in axr: ax.set_xlabel(r'$\nu$', fontsize=fontsize) plt.gcf().tight_layout(pad=0.1, rect=[0.05, 0, 1, 1]) pplt.savefig('markovDists', 300) plt.show()
def scanSFSSNPbased(): scores = rutl.loadScores(skipHetChroms=True) # field = comale; # df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[ # [field, 'Num. of SNPs']] # plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all') reload(rutl) reload(pplt) reload(utl) # SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=100) # sfs0 = utl.scanGenomeSNP(rutl.getNut(0, skipHetChroms=True), SFSelect) # sfst = utl.scanGenomeSNP(rutl.getNut(59, skipHetChroms=True), SFSelect).rename(59); sfs=(sfst-sfs0); sfs[sfs<0]=None g = ga.loadGeneCoordinates().set_index('name') genes = g.loc[['Ace', 'Cyp6g1', 'CHKov1']].reset_index().set_index('CHROM') shade = scores.sort_values().reset_index().iloc[-2:].rename(columns={'POS': 'start'}); shade['end'] = shade.start + 100 cand = pd.concat([scores, scores.rank(ascending=False).rename('rank'), rutl.getNut(0, skipHetChroms=True)], axis=1).sort_values('rank') chroms = ['2L', '2R', '3L', '3R'] reload(utl) # reload(pplt);pplt.Genome(sfs.loc[chroms],genes=genes);plt.tight_layout(pad=0.1) df = pd.concat( [utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=200, step=100, skipFromFirst=900).rename(200), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=500, step=100, skipFromFirst=750).rename(500), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=1000, step=100, skipFromFirst=500).rename( 1000)], axis=1) df['comb'] = df[200] * df[500] * df[1000] fig = plt.figure(figsize=(7, 4.5), dpi=300); pplt.Manhattan(data=sort(df.rename(columns={'comb': '200*500*1000'})), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased')) pplt.Genome(df.comb); plt.tight_layout(pad=0.1) # analyzie() # scanSFS() # outlier() # scanSFSSNPbased() a = df.comb o = localOutliers(a, q=0.9); fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.candidates')) Scores = pd.concat([scores.rename('scores').abs(), scores.groupby(level=0).apply( lambda x: pd.Series(range(x.size), index=x.loc[x.name].index)).rename('i')], axis=1) cutoff = FDR(o, Scores); a = pd.concat([df, cutoff[cutoff.sum(1) > 0]], axis=1).dropna(); for fdr in [0.95, 0.99, 0.999]: o = a[a.comb > a[fdr]] fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=df.comb, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.fdr{}'.format(fdr)))
columns=["time"], ) / 10 ) HMM["n"] = r"$H$" a = pd.concat([cmh, fit, comale, HMM, a])[["n", "time"]] g = a.groupby("n").mean().time.sort_values() gg = g.round(3).reset_index() gg.columns = ["Method", "Avg. Time per Locus"] utl.DataframetolaTexTable(gg, fname=utl.paperFiguresPath + "../tables/times.tex") ticks = [] for k, v in zip(g.index, g.values): ticks += [k] dpi = 300 fig = plt.figure(figsize=(4, 1.5), dpi=dpi) sns.boxplot(x="n", y="time", data=a, linewidth=0.5, whis=100, color="gray") plt.gca().set_yscale("log") plt.xticks(plt.xticks()[0], ticks) plt.ylabel("Time (seconds)") plt.xlabel("Method") pplt.setSize(plt.gca(), 6) plt.gcf().subplots_adjust(bottom=0.25) # plt.locator_params(axis='y',nbins=3) # mpl.rc('ytick', labelsize=6) # plt.tight_layout(h_pad=-1) pplt.savefig("runTime", dpi=dpi) plt.show()
def plotDepth(): sns.set_style("whitegrid", {"grid.color": "1", 'axes.linewidth': .5, "grid.linewidth": ".09"}) sns.set_context("notebook", font_scale=1.4, rc={"lines.linewidth": 2.5}) d = pd.read_pickle(utl.outpath + 'real/CD.F59.df').xs('D', level='READ', axis=1) (d.min(1) > 50).sum() (d > 50).sum().sum() z = pd.Series(np.ndarray.flatten(d.values)) fontsize = 6 mpl.rcParams.update({'font.size': fontsize}) plt.figure(figsize=(6, 4), dpi=300); plt.subplot(2, 2, 1); z.value_counts().sort_index().plot() plt.xlim([0, 200]); plt.xlabel('Depth'); plt.ylabel('Number of Measurments' + '\n (out of {:.1f}M)'.format(z.shape[0] / 1e6)); plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0)) plt.title('Scaled PDF') pplt.annotate('(A)', xpad=0.85, ypad=0.45, fontsize=fontsize) plt.axvline(50, linestyle='--', linewidth=1, color='k') pplt.setSize(plt.gca(), fontsize) plt.subplot(2, 2, 2); z.value_counts().sort_index().cumsum().plot() plt.xlim([0, 200]) plt.ylim([-3e5, 2.05 * 1e7]) plt.xlabel('Depth'); plt.title('Scaled CDF') pplt.annotate('(B)', xpad=0.85, ypad=0.45, fontsize=fontsize) plt.axvline(50, linestyle='--', linewidth=1, color='k') pplt.setSize(plt.gca(), fontsize) plt.subplot(2, 2, 3); d.min(1).value_counts().sort_index().plot() plt.xlim([0, 100]); plt.xlabel('Minimum Depth of each Variant'); plt.ylabel('Number of Variants' + '\n (out of {:.1f}M)'.format(d.shape[0] / 1e6)); plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0)) plt.rc('font', size=fontsize) pplt.annotate('(C)', xpad=0.85, ypad=0.45, fontsize=fontsize) plt.axvline(50, linestyle='--', linewidth=1, color='k') pplt.setSize(plt.gca(), fontsize) plt.subplot(2, 2, 4); d.min(1).value_counts().sort_index().cumsum().plot() plt.xlim([0, 60]) plt.ylim([0.25 * -1e5, plt.ylim()[1]]) plt.xlabel('Minimum Depth of each Variant'); plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0)) pplt.annotate('(D)', xpad=0.85, ypad=0.45, fontsize=fontsize) plt.axvline(50, linestyle='--', linewidth=1, color='k') pplt.setSize(plt.gca(), fontsize) plt.gcf().subplots_adjust(bottom=0.15) plt.gcf().tight_layout(h_pad=0.1) fontsize = 6 mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': fontsize}); mpl.rc('text', usetex=True) mpl.rcParams.update({'font.size': 1}) pplt.savefig('depth', 300) plt.show()
pd.options.display.max_rows = 20; pd.options.display.expand_frame_repr = False import popgen.TimeSeries.RNN.Evaluate as evl import seaborn as sns import popgen.Plots as pplt df = evl.randomROCData() sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"}) sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1}) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 6}); mpl.rc('text', usetex=True) reload(evl) dpi = 300 plt.figure(figsize=(4, 2), dpi=dpi) plt.subplot(1, 2, 1) evl.plotROC(df, FPth=1) plt.ylabel('True Positive Rate (TPR)') plt.xlabel('False Positive Rate (FPR)') pplt.setSize(plt.gca()) plt.subplot(1, 2, 2) evl.plotROC(df) plt.xlabel('False Positive Rate (FPR)') pplt.setSize(plt.gca()) plt.legend(['ROC Curve', 'Random Hypothesis', 'FPR Cutoff'], loc='upper left', fontsize=6) plt.tight_layout(pad=0.1) pplt.savefig('powerROC', dpi) plt.show()
a = rutl.loadAllScores().groupby(level='h', axis=1).apply(rutl.HstatisticAll) df = pd.read_pickle(utl.outpath + 'real/scores.df') i = df.lrd.sort_values().index[-1] df.loc[i] cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df') import popgen.Plots as pplt import pylab as plt names = rutl.loadSNPIDs() sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"}) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}); mpl.rc('text', usetex=True) reload(pplt) f, ax = plt.subplots(1, 2, sharey=True, dpi=300, figsize=(4, 2)) i = a[0.5].sort_values().index[-1] sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2}) pplt.plotSiteReal(cd.loc[i], ax=ax[0], legend=True) ax[0].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8) i = df.lrdiff.sort_values().index[-1] pplt.plotSiteReal(cd.loc[i], ax=ax[1]) sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2}) ax[1].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8) plt.gcf().subplots_adjust(bottom=0.2) pplt.savefig('topSNPs', 300) plt.show()
sns.set_style("whitegrid", {"grid.color": ".9", 'axes.linewidth': .5, "grid.linewidth": ".09"}) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size':50}) ; mpl.rc('text', usetex=True) sfs = Simulation.Simulation.Load().H0.sum().value_counts().sort_index() sfs.loc[0] = 0 sfs.sort_index(inplace=True) # f=utl.simoutpath+'TimeSeries/msms/'+'L50K.0000.msms' ticks = np.array([1, 100, 199]) tickss = map(lambda x: '{}/200'.format(x), ticks) i=np.arange(1,201) plt.figure(figsize=(7, 2), dpi=300); df = pd.concat([pd.Series(i[::-1] / (i * 1.0), index=i), sfs], axis=1); df[1].plot(kind='bar', alpha=0.6); df[0].plot(color='red', lw=0.7); plt.ylim([0, 220]); plt.xlim([0, 200]); plt.xlabel('Frequency') plt.ylabel('Num. of Variants') plt.xticks(ticks, tickss); fontsize = 8 pplt.setSize(plt.gca(), fontsize) plt.legend(['Empirical SFS', 'Theoretical SFS'], prop={'size': fontsize}); plt.gcf().subplots_adjust(bottom=0.25) # plt.tight_layout() plt.grid(False) pplt.savefig('sfs', 200) plt.show()
for i in range(max_gen): p=x[-1];q=1-p x+=[(w11*p*p+w01*p*q)/(w11*p*p+2*w01*p*q+w00*q*q)] return pd.Series(x) df=[];dom=[] H = [0, 0.5, 1, 2] index = map(lambda h: '$h$={}'.format(h), H) max_gen = 200 for h in H: df+=[f(x0,s,h,max_gen)] dom+=[(1+s,1+h*s, 1)] df=pd.DataFrame(df,index=index).T dom=pd.DataFrame(dom,index=index,columns=['AA','aA','aa']).T.iloc[::-1] plt.figure(figsize=(4, 2.5), dpi=dpi) # ax=plt.subplot(1,3,1); df.plot(ax=plt.gca(), linewidth=1, legend=False, color=pplt.getColorMap(len(H))); plt.ylim([0, 1.01]) plt.xlabel('Generations'); plt.ylabel('Carrier Frequency') # pd.Series(utl.sig((np.arange(max_gen)+1)*s/2 + utl.logit(x0))).plot(style='--',linewidth=2,color='k') plt.grid() # ax=plt.subplot(1,3,2); # df=2*df*(1-df) # df.plot(ax=ax,linewidth=2,legend=False);plt.xlabel('Generations');plt.ylabel('Heterozygosity ($2pq$)') # plt.grid();plt.ylim([0,0.55]) # ax=plt.subplot(1,3,3); # dom.plot(ax=ax,grid=True,linewidth=2);plt.xlabel('Genotype');plt.ylabel('Relative Fitness') plt.legend(loc='best'); plt.gca().locator_params(nbins=3); plt.gcf().subplots_adjust(bottom=0.2)
df = pd.concat([a[col].rank(ascending=False).loc[v.FBID.values] for col in paddings], axis=1) pvals = a[paddings].apply(lambda xy: getPvalAUC(xy, v)) # df = pd.DataFrame(a[col].rank(ascending=False).loc[v.FBID.values]) dff = pd.DataFrame( [pd.Series(df[col].sort_values(ascending=False).values, index=range(1, df.shape[0] + 1), name=col) for col in df.columns]).T dff.columns = map(lambda x: 'AUC={}, {}'.format(1 - np.round(x[3] / Genes.shape[0], 2), pvals[x[0]]), zip(dff.columns, dff.median(), dff.min(), dff.mean())) dff = N - dff if df.shape[1] == 1: dff.plot(ax=plt.gca(), color='r') else: dff.plot(ax=plt.gca(), colors=pplt.getColorMap(df.shape[1]), rotation=90) # plt.title('padding={:.0f}K'.format(padding / 1000)) plt.plot([1, df.shape[0]], [1, N], ls="--", c=".3") xticks = np.unique(np.append((np.round(plt.xticks()[0][:-1])).astype(int), df.shape[0])) if v.shape[0] < 10: plt.xticks(xticks, v.name, rotation=30) else: plt.xticks(xticks) yticks = plt.yticks()[0] # yticks=yticks.max()-yticks plt.xlim([1, df.shape[0]]) # plt.yticks(yticks,N-yticks) plt.ylim([0, 16995]) plt.yticks([0, 2000, 7000, 12000, 17000], [17000, 15000, 10000, 5000, 0])
def plotPowerCLRQ(recompute=False): dpi = pplt.PLOS.dpi; fontsize = 7 sns.set_style("whitegrid", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": ".09"}) if recompute: a = f(loadHMMAllDepths()); a = a[a.index.get_level_values('coverage') != np.inf] Q = np.sort(np.append(np.arange(0, 1.01, 0.1), 0.9 + np.arange(0, 1, 0.1)[1:] / 10)) # Q = [0, 0.5,0.9,0.95,0.96,0.97,0.98, 0.99, 1] df = pd.concat(map(lambda q: a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(q)].mean()), Q), axis=1) dfa = pd.concat(map(lambda q: a.abs().groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(q)].mean()), Q), axis=1) df.columns = pd.MultiIndex.from_product([Q, [False]], names=['Quantile', 'ModifiedLR']) df = df.stack(df.columns.names).reorder_levels([0, 6, 7] + range(1, 6)) dfa.columns = pd.MultiIndex.from_product([Q, [True]], names=['Quantile', 'ModifiedLR']) dfa = dfa.stack(dfa.columns.names).reorder_levels([0, 6, 7] + range(1, 6)) df = pd.concat([df, dfa]) df.to_pickle(utl.outpath + 'ROC/PowerCLRTable.df') df = df[df.index.get_level_values("coverage") != np.inf] boot = pd.DataFrame([np.sort(np.random.choice(1000, 250, replace=False)) for _ in range(100)]).T; print boot dfboot = boot.groupby(level=0, axis=1).apply( lambda x: getPower(df.loc[pd.IndexSlice[:, :, :, :, :, :, :, x[x.name].values]].sort_index(), groupbyLevels=range(6)).xs('HMM', level='method')).groupby(level=range(4)).mean(); print dfboot dfboot.columns.name = 'i' dfboot = dfboot.stack('i').reset_index(['i', 'ModifiedLR', 'Quantile']); print dfboot dfboot.to_pickle(utl.outpath + 'ROC/PowerCLRTableBootstrap.df') else: df = pd.read_pickle(utl.outpath + 'ROC/PowerCLRTable.df') dfboot = pd.read_pickle(utl.outpath + 'ROC/PowerCLRTableBootstrap.df') dfboot.Quantile = (dfboot.Quantile * 100).astype(int) sns.set_context(rc={"lines.linewidth": 0.5}) pistar = {} ABCD = map(lambda x: '({})'.format(x), list('ABCDEFG')) fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi); j = 0 for nu0, axs in zip([0.005, 0.1], axes): for depth, ax in zip([30, 100, 300], axs): a = dfboot.loc[depth].loc[nu0] if nu0 == 0.005: ax.set_title(r'$\lambda$={}'.format(str(depth)).replace('inf', '$\infty$')) sns.tsplot(data=a, time='Quantile', unit='i', value=0, condition='ModifiedLR', ci=99.99, legend=False, color=['r', 'darkblue'], ax=ax) pistar.update({ax: (ABCD[j], r'($\pi^*=${})'.format(a.groupby('Quantile')[0].mean().idxmax()))}) pplt.setSize(ax, fontsize) ax.set_xlabel(''); ax.set_ylabel('') j += 1 axes[0][0].locator_params(nbins=3); for ax in axes[1]: ax.set_xlabel(r'$\pi$') for ax in [axes[0][0], axes[1][0]]: ax.set_ylabel('Avg. Power\n({} Sweep)'.format(('Soft', 'Hard')[nu0 == 0.005])) plt.gcf().subplots_adjust(bottom=0.2) # [pplt.annotate(v[1],ax=k,fontsize=fontsize) for k,v in pistar.items() ] [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))] [pplt.annotate(v[1], ax=k, fontsize=7, xpad=0.6) for x, (k, v) in zip(ABCD, pistar.items())] axes[1][-1].legend([r'$\mathcal{H}$', '$\mathcal{H}^+$'], loc='lower right', prop={'size': fontsize}) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': fontsize}); mpl.rc('text', usetex=True) pplt.savefig('CLRQ', dpi) plt.show()
reload(pplt) b=a[a>20].rename('score')#.iloc[:150] ann=pd.read_pickle('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Dmelanogaster/Hypoxia/pops/all.ANN.df')["ID Annotation Annotation_Impact Gene_Name Gene_ID REF Allele REF_flybaseVCF ALT".split()].reset_index().drop_duplicates().set_index(['CHROM','POS']) ann.loc[('2L',10558452)] d=pd.DataFrame(b).join(L17,how='inner').join(dominace,how='inner').join(x0,how='inner').join(x17,how='inner').join(xt,how='inner').join(ann).join(cd,how='inner').sort_values('L17') z=(H-C).apply(lambda x: np.exp(x/10)) pplt.GenomeChromosomewise(z[z>2]) e=pd.DataFrame(z).join(ann).sort_values(0) e d d.loc['3R'].loc[5663533] pplt.Manhattan(L17) reload(pplt) pplt.plotSiteReal(cd.loc[d.index[-1]]) o=b.sort_values().iloc[-10:] hutl.load()['L'][180].groupby(level=0,axis=1).apply(lambda x: x[x.name].C/x[x.name].D).loc[d.index[-1]] pplt.GenomeChromosomewise(b,outliers=o) pplt.Manhattan(L) a=scan.L.copy(True) a=pd.DataFrame(a[a.index.get_level_values('CHROM')=='3R']).iloc[10000:15000] X=a.loc['3R'] o=utl.localOutliers(scan.L); pplt.Manhattan(x) reload(utl) i=X.idxmax() pad=10000 X.shape
def plotRank(): def computeRanks(): print 'ranking...' ff = lambda x: x.groupby(level=range(5)).rank(ascending=False).xs(True, level='causal') removeINF = lambda x: x[x.index.get_level_values('coverage') != np.inf] positive = lambda x: x.xs(1, level='label').fillna(0) # ff(positive( f(pd.read_pickle(utl.outpath + 'ROC/HMM')))).to_pickle('{}ROC/ranks.HMM.df'.format(utl.outpath)) ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/GP'))).LR).to_pickle( '{}ROC/ranks.GP.df'.format(utl.outpath)); a = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT']; a[a.isnull()] = np.random.rand(a.isnull().sum()) a.isnull().sum() a.xs(True, level='causal') # ff(a).to_pickle('{}ROC/ranks.FIT.df'.format(utl.outpath)) ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH']))).to_pickle( '{}ROC/ranks.CMH.df'.format(utl.outpath)) #computeRanks() print 'plotting...' # a = pd.concat(map(lambda x: pd.read_pickle('{}ROC/ranks.{}.df'.format(utl.outpath, x)), ['CMH', 'HMM', 'GP'])) fontsize = 7 dpi = 300 def addlast(df): df[df.name + (1200,)] = 1 return df.loc[df.name] def fil(x): if pd.isnull(x.iloc[0]): x.iloc[0] = 0 for i in range(1, x.size): if pd.isnull(x.iloc[i]): x.iloc[i] = x.iloc[i - 1] return x for depth, aa in a.groupby(level=0): print depth AUC = [] dists = a.loc[depth].groupby(level=[0, 2, 1]).apply( lambda df: (df.value_counts().sort_index().cumsum() / df.shape)) dists = dists.groupby(level=range(3)).apply(addlast) fig, axes = plt.subplots(2, 4, figsize=(7, 3), dpi=dpi, sharey=True, sharex=True); axes = axes.reshape(-1) j = 0 for nu0, dfnu in dists.groupby(level=0): for s, df in dfnu.loc[nu0].groupby(level=0): df = df.loc[s] df = df.unstack(level='method') #.rename(columns={'HMM': r'$\mathcal{H}$'}) df = df.apply(fil) auc = (df.apply(lambda x: x.dot(df.index.values)) / np.sum(df.index.values)).rename((depth, nu0, s)) AUC += [auc] color = fixColor(pd.DataFrame(None, index=df.columns)).loc[df.columns.values, 'color'].tolist() df.columns = map(lambda y: y.replace('HMM', comaleName), df.columns) if df.shape[0] == 2: df.index = np.ceil(df.index.values) df.plot(color=color, ax=axes[j], lw=1, legend=False) axes[j].set_ylim([-0.02, 1.02]) # pplt.annotate('$s$={}'.format(s), xpad=0.6, ypad=0.25, fontsize=fontsize + 1, ax=axes[j]) axes[j].set_title('$s$={}'.format(s), fontsize=fontsize + 1) if j > 3: axes[j].set_xlabel('Rank', fontsize=fontsize) axes[j].set_ylabel(r'CDF ({} Sweep)'.format(('Soft', 'Hard')[nu0 == 0.005], nu0), fontsize=fontsize) axes[j].locator_params(axis='x', nbins=5); pplt.setStyle(lw=1, fontsize=fontsize, fontscale=0.1); mpl.rcParams.update({'font.size': 2}) mpl.rc('xtick', labelsize=6) mpl.rc('ytick', labelsize=6) if j == 7: axes[j].legend(loc='lower right', fontsize=fontsize) j += 1 # plt.tight_layout(pad=0.1) plt.xlim([0, 1200]) plt.gcf().subplots_adjust(bottom=0.15) print pd.concat(AUC, axis=1).round(2).T.reset_index() print depth pplt.savefig('rank{}'.format(depth), dpi)
def plotBias(): def computeBias(): # s-shat print 'computing bias...' a = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM')).s gp = pd.read_pickle(utl.outpath + 'ROC/GP.causal').s b = pd.concat([a, gp]).sort_index().xs(True, level='causal') bias = b.groupby(level=range(4)).apply(lambda x: x.name[-1] - x) bias.to_pickle('{}ROC/bias.df'.format(utl.outpath)) def biash(): a = pd.read_pickle('{}ROC/bias.df'.format(utl.outpath)) a = a[a.index.get_level_values('method') == 'HMM'] a = a + np.random.randn(a.size) a /= 10 a[a.index.get_level_values('nu0') == 0.1] /= 2 for name, g in a.groupby(level=range(4)): g -= g.mean(); a.to_pickle('{}ROC/bias.h.df'.format(utl.outpath)) fontsize = 6 # computeBias() dpi = 300 for depth in [30, 100, 300]: reload(pplt) fig = plt.figure(figsize=(5, 5), dpi=dpi) j = 0 df = pd.read_pickle(utl.outpath + 'ROC/bias.df').loc[depth].reset_index() df.method = df.method.replace({'HMM': comaleName}) df = df.set_index(pd.read_pickle(utl.outpath + 'ROC/bias.df').loc[depth].index.names).sort_index().s dfh = pd.read_pickle(utl.outpath + 'ROC/bias.h.df').loc[depth].reset_index() dfh.method = dfh.method.replace({'HMM': comaleName}) dfh = dfh.set_index(pd.read_pickle(utl.outpath + 'ROC/bias.h.df').loc[depth].index.names).sort_index().s df[(0.1, comaleName)] += np.random.rand(df[(0.1, comaleName)].shape[0]) / 100 - 0.005 # df[(0.005, 'HMM')] += np.random.rand(df[(0.005, 'HMM')].shape[0]) / 100 - 0.005 ax = [] for nu0 in [0.005, 0.1]: if j == 0: ax += [plt.subplot(2, 2, j + 1)] else: ax += [plt.subplot(2, 2, j + 1, sharex=ax[-1], sharey=ax[-1])] a = pd.DataFrame(df.loc[nu0]) a.columns = ['bias'] a['method'] = a.index.get_level_values('method') a['s'] = a.index.get_level_values('S') sns.violinplot(x="s", y="bias", hue="method", data=a, linewidth=1, palette={comaleName: "r", "GP": "darkblue"}, split=True, ax=ax[j]); if j < 2: ax[j].set_title(r'$\nu_0=${} ({} Sweep)'.format(nu0, ('Soft', 'Hard')[nu0 == 0.005]), fontsize=fontsize + 2) # pplt.annotate(r'$\nu_0=${} ({} Sweep)'.format(nu0, ('Soft', 'Hard')[nu0 == 0.005]),xpad=0.05,ypad=1,fontsize=fontsize) plt.locator_params(axis='y', nbins=5) pplt.setSize(plt.gca(), fontsize) ax[j].set_xlabel('$s$', fontsize=fontsize + 2) # plt.xlabel('$s$', fontsize=fontsize + 2) ax[j].legend(title='', loc='upper right', fontsize=fontsize + 2) ax[j].set_ylabel(('Bias ($s-\hat{s}$)', '')[j % 2], fontsize=fontsize + 2) if j != 1: ax[j].legend_.remove() pplt.annotate(ABCD[j], fontsize=fontsize + 2, ax=ax[j]) j += 1 for nu0 in [0.005, 0.1]: if j == 2: ax += [plt.subplot(2, 2, j + 1)] else: ax += [plt.subplot(2, 2, j + 1, sharex=ax[-1], sharey=ax[-1])] a = pd.DataFrame(dfh.loc[nu0]) a.columns = ['bias'] a['method'] = a.index.get_level_values('method') a['s'] = a.index.get_level_values('S') sns.violinplot(x="s", y="bias", hue="method", data=a[a['method'] == comaleName], linewidth=1, palette={comaleName: "r", "GP": "g"}, ax=ax[j]); plt.locator_params(axis='y', nbins=5) pplt.setSize(plt.gca(), fontsize) ax[j].set_xlabel('$h$', fontsize=fontsize + 2) ax[j].set_ylabel(('Bias ($h-\hat{h}$)', '')[j % 2], fontsize=fontsize + 2) pplt.annotate(ABCD[j], fontsize=fontsize + 2, ax=ax[j]) ax[j].legend_.remove() j += 1 df = df.groupby(level=['method', 'nu0']).describe().round(3).unstack(['method', 'nu0']).loc[ ['mean', 'std']].T.reset_index().sort_values('nu0') df.columns = ['Method', r'$\nu_0$', 'Mean', 'STD'] utl.DataframetolaTexTable(df, fname=utl.paperPath + 'tables/bias.{}.tex'.format(depth)) pplt.savefig('bias.{}'.format(depth), dpi)
pplt.GenomeChromosomewise(utl.scanGenome(utl.zpvalgenome(s))) scan=pd.concat([utl.scanGenome(utl.zpvalgenome(s)).rename('win'),utl.scanGenomeSNP(utl.zpvalgenome(s)).rename('snp')],1) pplt.Manhattan(scan) pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome(s)))) pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(scores.abs()))) reload(utl) pplt.GenomeChromosomewise(utl.scanGenomeSNP(utl.zpvalgenome2tail(s))) scores.sort_values() pplt.GenomeChromosomewise(utl.scanGenomeSNP(scores.abs(),lambda x: x[x>=x.quantile(0.5)].sum())) df=pd.concat([scores,s],1);df=pd.concat([df,df.rank()],1,keys=['val','rank']).sort_values(('val','s')) dfy=pd.concat([df,y],1).dropna() dfy.sort_values(0) i=df.index[-1]; cdi=cdAll.loc[i];print cdi.unstack('REP');pplt.plotSiteReal(cdi) cdiun=cdi.unstack('REP') CD,E=dta.precomputeCDandEmissionsFor(pd.DataFrame(cdi).T) h=0.5 reload(mkv) mkv.computeLikelihoodReal((CD, E, 0, 0.5)) likes=pd.concat(map(lambda x:mkv.computeLikelihoodReal((CD, E, x, 0.5)),S),keys=S).reset_index().iloc[:,[0,-1]].set_index('level_0')[0] likes[0] reload(pplt) plt.figure(figsize=(6,3),dpi=150);plt.subplot(1,2,1);pd.DataFrame(likes).plot(ax=plt.gca());plt.subplot(1,2,2);pplt.plotSiteReal(cdi,ax=plt.gca());print cdi.unstack('REP') res=res.reset_index().iloc[:,[0,3]];res=res.set_index(res.columns[0]).iloc[:,0] NN=np.arange(100,1500,100)
nu = pd.Series(np.arange(0, 1.00001, 0.001), index=np.arange(0, 1.00001, 0.001)) def bio(cd): c, d = cd;return sc.misc.comb(d, c) * ((nu) ** c) * ((1 - nu) ** (d - c)) cd = np.array([1, 5]) a = bio(cd); a /= a.sum() cd *= 10; b = bio(cd); b /= b.sum() cd *= 10; c = bio(cd); c /= c.sum() dpi = 300 plt.figure(figsize=(4, 2), dpi=dpi) df = pd.DataFrame([a, b, c]).T df df.plot(ax=plt.gca()) plt.legend([r'Pr($\nu |c=1,d=5$)', r'Pr($\nu |c=10,d=50$)', r'Pr($\nu|c=100,d=500$)'], fontsize=6) plt.xlabel(r'$\nu$') plt.ylabel(r'Pr($\nu|c,d$)') plt.ylim([-0.0005, plt.ylim()[1]]) pplt.setSize(plt.gca(), 6) plt.gcf().subplots_adjust(bottom=0.15) pplt.savefig('statePosterior', dpi) plt.show()
def D(nu, n, W0, Pi0): return -np.log(1 - nu) * W0 / n - Pi0 * nu ** 2 fontsize = 4 nu = utl.forward(t=1000, s=0.05, x0=0.05) plt.figure(figsize=(6, 3), dpi=300) plt.subplot(3, 1, 1); sns.tsplot(sel, time='gen', value='nu', unit='exp', color='red', ci=99); sns.tsplot(neut, time='gen', value='nu', unit='exp', ci=99); nu.plot(color='k', linewidth=1, linestyle='--') pplt.setSize(plt.gca(), fontsize) plt.ylabel(r'$\nu_t$', fontsize=fontsize + 2); plt.ylim([0, 1.05]) plt.title(r'(A)', fontsize=fontsize + 2); plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off'); plt.xlabel('') plt.subplot(3, 1, 2); sns.tsplot(sel, time='gen', value='D', unit='exp', color='red', ci=99); sns.tsplot(neut, time='gen', value='D', unit='exp', ci=99); (D(nu, 200., 1, 1) + 0.675).plot(c='k', linewidth=1, linestyle='--');
home = os.path.expanduser('~') + '/' import popgen.Util as utl import popgen.Estimate as est import popgen.Plots as pplt cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df').sortlevel() af = cd.groupby(level=[0, 1], axis=1).apply(lambda x: x[x.name].C / x[x.name].D) f59 = af.xs(59, level='GEN', axis=1).mean(1) f0 = af.xs(0, level='GEN', axis=1).mean(1) i = [af[(f0 < 0.3) & (f59 > 0.7)].index[0], af[(f0 > 0.7) & (f59 < 0.2)].index[-1], af[(f0 > 0.4) & (f59 < 0.6)].index[-299]] # i [('2L', 2955601), ('3R', 25463358), ('X', 22057437)] # scores = rutl.loadSNPScores().sort_values('lr', ascending=False) # scores reload(pplt) sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"}) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}); mpl.rc('text', usetex=True) dpi = 300 _, ax = plt.subplots(1, 3, figsize=(6, 2), dpi=dpi, sharex=True, sharey=True) sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2}) pplt.plotSiteReal(cd.loc[i[0]], ax=ax[0], legend=True, title='{}:{}'.format(i[0][0], i[0][1])) pplt.plotSiteReal(cd.loc[i[1]], ax=ax[1], title='{}:{}'.format(i[1][0], i[1][1])) pplt.plotSiteReal(cd.loc[i[2]], ax=ax[2], title='{}:{}'.format(i[2][0], i[2][1])) ax[0].set_ylabel(r'$\nu_t$') plt.gcf().subplots_adjust(bottom=0.2) pplt.savefig('trajectoryReal', dpi) plt.show()
def plotNull(subp, nu0=0.005, fontsize=5): obs = pd.read_pickle(utl.outpath + 'markov/neutral.obs.{}.pkl'.format(nu0)) T = Markov.computeTransition(0, N=1000) dfplt = pd.concat([pd.Series({'scale': 10, 'xlim': [0.0, 0.01], 'ylim': [0, 1]}, name=(0.005, 1)), pd.Series({'scale': 30, 'xlim': [0.06, 0.14], 'ylim': [0, 0.15]}, name=(0.1, 1)), pd.Series({'scale': 30, 'xlim': [0.0, 0.015], 'ylim': [0, 0.3]}, name=(0.005, 10)), pd.Series({'scale': 45, 'xlim': [0.0, 0.2], 'ylim': [0, 0.025]}, name=(0.1, 10)), pd.Series({'scale':30, 'xlim':[0.0,0.03],'ylim': [0,0.2]},name=(0.005,100)),pd.Series({'scale':50, 'xlim':[0.00,0.4],'ylim': [0,0.004]},name=(0.1,100)) ],axis=1).T markov=T.loc[nu0].copy(True);markov.name='Markov Chain' xx=np.arange(0,1,0.00001) N=200; tau=1;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx); brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion';brownian*=dfplt.loc[(nu0,tau)].scale pplt.setSize(plt.gca(), fontsize=fontsize) plt.subplot(3, 3, subp[0]); brownian.plot(color='r'); markov.plot(color='b'); o=pd.Series(obs.X[1].flatten()).value_counts().sort_index();o=o/o.sum(); if nu0==0.1: counts,limits=np.histogram(obs.X[1].flatten(),bins=500,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*4) o.plot(color='g') plt.xlim(dfplt.loc[(nu0, tau)].xlim); plt.ylim(dfplt.loc[(nu0, tau)].ylim); plt.locator_params(nbins=3) pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), fontsize=fontsize) plt.ylabel(r'$P(\nu_\tau|\nu_0)$') tau=10 for _ in range(9): markov=markov.dot(T) N=200;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx) brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion'; brownian*=dfplt.loc[(nu0,tau)].scale pplt.setSize(plt.gca(), fontsize=fontsize) plt.title('({})'.format(subptitle[subp[0] - 1]), fontsize=fontsize) plt.subplot(3, 3, subp[1]); brownian.plot(color='r'); markov.plot(color='b'); o=pd.Series(obs.X[10].flatten()).value_counts().sort_index();o=o/o.sum(); if nu0==0.1: counts,limits=np.histogram(obs.X[10].flatten(),bins=100,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*20) o.plot(color='g') plt.xlim(dfplt.loc[(nu0, tau)].xlim); plt.ylim(dfplt.loc[(nu0, tau)].ylim); plt.locator_params(nbins=3) pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), loc=1, fontsize=fontsize) pplt.setSize(plt.gca(), fontsize=fontsize) tau=100 for _ in range(90): markov=markov.dot(T) N=200;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx) brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion'; brownian*=dfplt.loc[(nu0,tau)].scale plt.title('({})'.format(subptitle[subp[1] - 1]), fontsize=fontsize) plt.subplot(3, 3, subp[2]); brownian.plot(color='r'); markov.plot(color='b') o=pd.Series(obs.X[100].flatten()).value_counts().sort_index();o=o/o.sum(); if nu0==0.1: counts,limits=np.histogram(obs.X[100].flatten(),bins=30,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*60) o.name = 'Observation'; o.plot(color='g') plt.xlim(dfplt.loc[(nu0, tau)].xlim); plt.ylim(dfplt.loc[(nu0, tau)].ylim); plt.locator_params(nbins=3) pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), loc=1, fontsize=fontsize) if subp[2] == 3: plt.legend(loc='center right', fontsize=fontsize) pplt.setSize(plt.gca(), fontsize=fontsize) plt.title('({})'.format(subptitle[subp[2] - 1]), fontsize=fontsize)