def saveTopKSNPs(): scores = rutl.loadScores() ann = loadANN()["Annotation Annotation_Impact Gene_Name Gene_ID".split()] scores = pd.concat([scores, rutl.loadSNPIDs()], axis=1).set_index('ID', append=True)[0].rename('Hstatistic') top = scores[scores > scores.quantile(0.9999)].reset_index('ID').join(rutl.getNut(0), how='inner') top = top.join(ann).drop_duplicates().sort_values('Hstatistic', ascending=False) top = top[top['Annotation_Impact'] != 'LOW'] top.to_csv(utl.outpath + 'real/top_1e-4_quantile_SNPs.csv')
home = os.path.expanduser('~') + '/' import popgen.Util as utl import popgen.Estimate as est import popgen.Run.TimeSeries.RealData.Utils as rutl a = rutl.loadAllScores().groupby(level='h', axis=1).apply(rutl.HstatisticAll) df = pd.read_pickle(utl.outpath + 'real/scores.df') i = df.lrd.sort_values().index[-1] df.loc[i] cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df') import popgen.Plots as pplt import pylab as plt names = rutl.loadSNPIDs() sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"}) mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']}); mpl.rc('text', usetex=True) reload(pplt) f, ax = plt.subplots(1, 2, sharey=True, dpi=300, figsize=(4, 2)) i = a[0.5].sort_values().index[-1] sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2}) pplt.plotSiteReal(cd.loc[i], ax=ax[0], legend=True) ax[0].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8) i = df.lrdiff.sort_values().index[-1] pplt.plotSiteReal(cd.loc[i], ax=ax[1]) sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})