示例#1
0
home = os.path.expanduser('~') + '/'
import popgen.Util as utl
import popgen.Estimate as est
import popgen.Plots as pplt

cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df').sortlevel()
af = cd.groupby(level=[0, 1], axis=1).apply(lambda x: x[x.name].C / x[x.name].D)
f59 = af.xs(59, level='GEN', axis=1).mean(1)
f0 = af.xs(0, level='GEN', axis=1).mean(1)
i = [af[(f0 < 0.3) & (f59 > 0.7)].index[0], af[(f0 > 0.7) & (f59 < 0.2)].index[-1],
     af[(f0 > 0.4) & (f59 < 0.6)].index[-299]]
# i [('2L', 2955601), ('3R', 25463358), ('X', 22057437)]
# scores = rutl.loadSNPScores().sort_values('lr', ascending=False)
# scores
reload(pplt)
sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"})
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']});
mpl.rc('text', usetex=True)
dpi = 300
_, ax = plt.subplots(1, 3, figsize=(6, 2), dpi=dpi, sharex=True, sharey=True)
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})
pplt.plotSiteReal(cd.loc[i[0]], ax=ax[0], legend=True, title='{}:{}'.format(i[0][0], i[0][1]))
pplt.plotSiteReal(cd.loc[i[1]], ax=ax[1], title='{}:{}'.format(i[1][0], i[1][1]))
pplt.plotSiteReal(cd.loc[i[2]], ax=ax[2], title='{}:{}'.format(i[2][0], i[2][1]))

ax[0].set_ylabel(r'$\nu_t$')
plt.gcf().subplots_adjust(bottom=0.2)
pplt.savefig('trajectoryReal', dpi)
plt.show()
示例#2
0
pplt.GenomeChromosomewise(utl.scanGenome(utl.zpvalgenome(s)))

scan=pd.concat([utl.scanGenome(utl.zpvalgenome(s)).rename('win'),utl.scanGenomeSNP(utl.zpvalgenome(s)).rename('snp')],1)
pplt.Manhattan(scan)
pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome(s))))
pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(scores.abs())))
reload(utl)
pplt.GenomeChromosomewise(utl.scanGenomeSNP(utl.zpvalgenome2tail(s)))
scores.sort_values()
pplt.GenomeChromosomewise(utl.scanGenomeSNP(scores.abs(),lambda x: x[x>=x.quantile(0.5)].sum()))
df=pd.concat([scores,s],1);df=pd.concat([df,df.rank()],1,keys=['val','rank']).sort_values(('val','s'))
dfy=pd.concat([df,y],1).dropna()
dfy.sort_values(0)

i=df.index[-1];
cdi=cdAll.loc[i];print cdi.unstack('REP');pplt.plotSiteReal(cdi)
cdiun=cdi.unstack('REP')
CD,E=dta.precomputeCDandEmissionsFor(pd.DataFrame(cdi).T)
h=0.5
reload(mkv)

mkv.computeLikelihoodReal((CD, E, 0, 0.5))
likes=pd.concat(map(lambda x:mkv.computeLikelihoodReal((CD, E, x, 0.5)),S),keys=S).reset_index().iloc[:,[0,-1]].set_index('level_0')[0]
likes[0]

reload(pplt)
plt.figure(figsize=(6,3),dpi=150);plt.subplot(1,2,1);pd.DataFrame(likes).plot(ax=plt.gca());plt.subplot(1,2,2);pplt.plotSiteReal(cdi,ax=plt.gca());print cdi.unstack('REP')

res=res.reset_index().iloc[:,[0,3]];res=res.set_index(res.columns[0]).iloc[:,0]

NN=np.arange(100,1500,100)
示例#3
0
reload(pplt)
b=a[a>20].rename('score')#.iloc[:150]
ann=pd.read_pickle('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Dmelanogaster/Hypoxia/pops/all.ANN.df')["ID  Annotation Annotation_Impact       Gene_Name Gene_ID REF Allele REF_flybaseVCF ALT".split()].reset_index().drop_duplicates().set_index(['CHROM','POS'])
ann.loc[('2L',10558452)]
d=pd.DataFrame(b).join(L17,how='inner').join(dominace,how='inner').join(x0,how='inner').join(x17,how='inner').join(xt,how='inner').join(ann).join(cd,how='inner').sort_values('L17')

z=(H-C).apply(lambda x: np.exp(x/10))
pplt.GenomeChromosomewise(z[z>2])
e=pd.DataFrame(z).join(ann).sort_values(0)
e
d
d.loc['3R'].loc[5663533]

pplt.Manhattan(L17)
reload(pplt)
pplt.plotSiteReal(cd.loc[d.index[-1]])
o=b.sort_values().iloc[-10:]
hutl.load()['L'][180].groupby(level=0,axis=1).apply(lambda x: x[x.name].C/x[x.name].D).loc[d.index[-1]]

pplt.GenomeChromosomewise(b,outliers=o)
pplt.Manhattan(L)
a=scan.L.copy(True)
a=pd.DataFrame(a[a.index.get_level_values('CHROM')=='3R']).iloc[10000:15000]

X=a.loc['3R']
o=utl.localOutliers(scan.L);
pplt.Manhattan(x)
reload(utl)
i=X.idxmax()
pad=10000
X.shape
示例#4
0
文件: topSNPs.py 项目: airanmehr/bio
a = rutl.loadAllScores().groupby(level='h', axis=1).apply(rutl.HstatisticAll)
df = pd.read_pickle(utl.outpath + 'real/scores.df')
i = df.lrd.sort_values().index[-1]
df.loc[i]

cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df')

import popgen.Plots as pplt
import pylab as plt

names = rutl.loadSNPIDs()
sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"})
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']});
mpl.rc('text', usetex=True)
reload(pplt)
f, ax = plt.subplots(1, 2, sharey=True, dpi=300, figsize=(4, 2))
i = a[0.5].sort_values().index[-1]
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})

pplt.plotSiteReal(cd.loc[i], ax=ax[0], legend=True)
ax[0].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8)

i = df.lrdiff.sort_values().index[-1]
pplt.plotSiteReal(cd.loc[i], ax=ax[1])
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})

ax[1].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8)
plt.gcf().subplots_adjust(bottom=0.2)
pplt.savefig('topSNPs', 300)
plt.show()