def plotPowerCLR(recompute=False): if recompute: mc = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'MarkovChain')) hmm = f(pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM'))) a = pd.concat([mc, hmm]); print a a = a[a.index.get_level_values('coverage') != np.inf] df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean()))[0] # df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean())) df = getPower(df, groupbyLevels=range(4)) df.to_pickle(utl.outpath + 'ROC/PowerCLR.df') else: df = pd.read_pickle(utl.outpath + 'ROC/PowerCLR.df') reload(pplt) info = pplt.getNameColorMarker(df) info.loc[info.index.get_level_values('method') == 'HMM', 'marker'] = '--o' info.loc[info.index.get_level_values('method') == 'MarkovChain', 'marker'] = '--s' info.loc[info.index.get_level_values('method') == 'HMM', 'color'] = 'r' info.loc[info.index.get_level_values('method') == 'MarkovChain', 'color'] = 'darkblue' # info.loc[info.index.get_level_values('q')==0.99,'color']='r' # info.loc[info.index.get_level_values('q')==1,'color']='darkblue' fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi); pplt.setStyle(lw=1); pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard'); pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft'); [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))] plt.gcf().subplots_adjust(bottom=0.15) pplt.savefig('powerCLR', dpi=dpi) plt.show()
def plotPower(recompute=False): if recompute: causal = lambda x: x[(x.index.get_level_values('causal') == True) | (x.index.get_level_values('label') == -1)] FIT = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT']; FIT[FIT.isnull()] = np.random.rand(FIT.isnull().sum()) CMH = causal(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH'].fillna(0)) GP = causal(pd.read_pickle(utl.outpath + 'ROC/GP').LR) HMM = f(loadHMMAllDepths()) # HMM = (HMM.alt - HMM.null) ;HMM = HMM.groupby(level=range(6)).mean() # HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean()) HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean()) GP = GP.groupby(level=range(6)).max() FIT = FIT.groupby(level=range(6)).max(); # dont move this line! CMH = CMH.groupby(level=range(6)).max(); df = getPower(pd.concat([GP, HMM, FIT, CMH]), range(4)).sort_index() df.to_pickle(utl.outpath + 'ROC/Power.df') else: df = pd.read_pickle(utl.outpath + 'ROC/Power.df') df = df[df.index.get_level_values('coverage') != np.inf] df = fixComaleName(df) info = fixColor(pplt.getNameColorMarker(df)) fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=pplt.PLOS.dpi); pplt.setStyle(lw=1); reload(pplt) pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard', panel=list('ABC')); pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft', panel=list('DEF')); [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))] plt.gcf().subplots_adjust(bottom=0.15) pplt.savefig('power', pplt.PLOS.dpi) df.groupby(level=range(3)).mean().unstack('method').to_pickle(utl.outpath + 'ROC/avgPower.df') csv = df.groupby(level=range(3)).mean().reset_index() # csv.replace({'HMM': comaleName}, inplace=True) csv.replace({np.inf: r'$\infty$'}, inplace=True) csv.nu0.replace({0.005: 'Hard', 0.1: 'Soft'}, inplace=True) csv.columns = [r'$\lambda$', 'Sweep', 'Method', 'Avg Power'] csv.sort_values([r'$\lambda$', 'Sweep', 'Avg Power'], ascending=False, inplace=True) csv['Avg Power'] = csv['Avg Power'].round().astype(int) csv = csv.set_index(['Sweep']) i = csv[r'$\lambda$'].apply(lambda x: not isinstance(x, str)) csv.loc[i, r'$\lambda$'] = csv.loc[i, r'$\lambda$'].astype(int) soft = csv.loc['Soft'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False) hard = csv.loc['Hard'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False) utl.DataframetolaTexTable(hard, fname=utl.paperFiguresPath + '../tables/powerHardMathods.tex') utl.DataframetolaTexTable(soft, fname=utl.paperFiguresPath + '../tables/powerSoftMethods.tex') plt.show()
def plotRank(): def computeRanks(): print 'ranking...' ff = lambda x: x.groupby(level=range(5)).rank(ascending=False).xs(True, level='causal') removeINF = lambda x: x[x.index.get_level_values('coverage') != np.inf] positive = lambda x: x.xs(1, level='label').fillna(0) # ff(positive( f(pd.read_pickle(utl.outpath + 'ROC/HMM')))).to_pickle('{}ROC/ranks.HMM.df'.format(utl.outpath)) ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/GP'))).LR).to_pickle( '{}ROC/ranks.GP.df'.format(utl.outpath)); a = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT']; a[a.isnull()] = np.random.rand(a.isnull().sum()) a.isnull().sum() a.xs(True, level='causal') # ff(a).to_pickle('{}ROC/ranks.FIT.df'.format(utl.outpath)) ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH']))).to_pickle( '{}ROC/ranks.CMH.df'.format(utl.outpath)) #computeRanks() print 'plotting...' # a = pd.concat(map(lambda x: pd.read_pickle('{}ROC/ranks.{}.df'.format(utl.outpath, x)), ['CMH', 'HMM', 'GP'])) fontsize = 7 dpi = 300 def addlast(df): df[df.name + (1200,)] = 1 return df.loc[df.name] def fil(x): if pd.isnull(x.iloc[0]): x.iloc[0] = 0 for i in range(1, x.size): if pd.isnull(x.iloc[i]): x.iloc[i] = x.iloc[i - 1] return x for depth, aa in a.groupby(level=0): print depth AUC = [] dists = a.loc[depth].groupby(level=[0, 2, 1]).apply( lambda df: (df.value_counts().sort_index().cumsum() / df.shape)) dists = dists.groupby(level=range(3)).apply(addlast) fig, axes = plt.subplots(2, 4, figsize=(7, 3), dpi=dpi, sharey=True, sharex=True); axes = axes.reshape(-1) j = 0 for nu0, dfnu in dists.groupby(level=0): for s, df in dfnu.loc[nu0].groupby(level=0): df = df.loc[s] df = df.unstack(level='method') #.rename(columns={'HMM': r'$\mathcal{H}$'}) df = df.apply(fil) auc = (df.apply(lambda x: x.dot(df.index.values)) / np.sum(df.index.values)).rename((depth, nu0, s)) AUC += [auc] color = fixColor(pd.DataFrame(None, index=df.columns)).loc[df.columns.values, 'color'].tolist() df.columns = map(lambda y: y.replace('HMM', comaleName), df.columns) if df.shape[0] == 2: df.index = np.ceil(df.index.values) df.plot(color=color, ax=axes[j], lw=1, legend=False) axes[j].set_ylim([-0.02, 1.02]) # pplt.annotate('$s$={}'.format(s), xpad=0.6, ypad=0.25, fontsize=fontsize + 1, ax=axes[j]) axes[j].set_title('$s$={}'.format(s), fontsize=fontsize + 1) if j > 3: axes[j].set_xlabel('Rank', fontsize=fontsize) axes[j].set_ylabel(r'CDF ({} Sweep)'.format(('Soft', 'Hard')[nu0 == 0.005], nu0), fontsize=fontsize) axes[j].locator_params(axis='x', nbins=5); pplt.setStyle(lw=1, fontsize=fontsize, fontscale=0.1); mpl.rcParams.update({'font.size': 2}) mpl.rc('xtick', labelsize=6) mpl.rc('ytick', labelsize=6) if j == 7: axes[j].legend(loc='lower right', fontsize=fontsize) j += 1 # plt.tight_layout(pad=0.1) plt.xlim([0, 1200]) plt.gcf().subplots_adjust(bottom=0.15) print pd.concat(AUC, axis=1).round(2).T.reset_index() print depth pplt.savefig('rank{}'.format(depth), dpi)
o.iloc[1] = o.iloc[2] # o=(obs[100].value_counts().sort_index()/obs.shape[0]) o.name = 'Observation'; o.plot(color='g'); markov.plot(color='b'); plt.xlim(dfplt.loc[(nu0, tau)].xlim); plt.ylim(dfplt.loc[(nu0, tau)].ylim); plt.locator_params(nbins=3) pplt.annotate(r'$s={}$, $\nu_0=${}, $\tau$={}'.format(s, nu0, tau), loc=1, fontsize=fontsize) plt.xlabel('$s$') pplt.setSize(plt.gca(), fontsize=fontsize) plt.title('({})'.format(subptitle[subp[2] - 1]), fontsize=fontsize) if __name__ == '__main__': # createNeutralSimulations() # createSelectionSimulations(s=0.01) # createSelectionSimulations(s=0.1) reload(pplt) dpi = 200; fig = plt.figure(figsize=(6.2, 4), dpi=dpi); pplt.setStyle(lw=1); fontsize = 7 plotNull(range(1, 4), fontsize=fontsize); plotNull(range(4, 7), 0.1, fontsize=fontsize); plotAlternative(range(7, 10), fontsize=fontsize); plt.tight_layout() pplt.savefig('markovDists', dpi=dpi); plt.gcf().subplots_adjust(bottom=0.1) plt.show() print 'Done'