def accuracy(self, plot=True): nap = self.dissimilarity() acc = nap.pivot_table(values='dist', index=['layer', 'geon', 'fno', 'dimension'], columns='kind').reset_index() acc['accuracy'] = acc['non-accidental'] > acc['metric'] if self.bootstrap: dfs = [] for layer in acc.layer.unique(): sel = acc[acc.layer==layer]['accuracy'] pct = stats.bootstrap_resample(sel, ci=None, func=np.mean) d = OrderedDict([('kind', ['nap'] * len(pct)), ('layer', [layer]*len(pct)), ('accuracy', sel.mean()), ('iter', range(len(pct))), ('bootstrap', pct)]) dfs.append(pandas.DataFrame.from_dict(d)) df = pandas.concat(dfs) else: df = acc.groupby('layer').mean().reset_index() df['kind'] = 'nap' df['iter'] = 0 df['bootstrap'] = np.nan if self.task == 'run' and plot: self.plot_single(df, 'acc') return df
def _corr_all_orig(self, pref): df = [] for dim in self.myexp.dims: dim_data = load(pref='dis', exp=self.myexp.exp, suffix=dim)[dim] if dim_data.ndim == 3: dim_data = np.mean(dim_data, axis=0) for depth, model_name in self.myexp.models: self.myexp.set_model(model_name) dis = self.myexp.dissimilarity() layer = dis.keys()[-1] dis = dis[layer] corr = stats.corr(dis, dim_data, sel='upper') if self.myexp.bootstrap: print('bootstrapping stats...') bf = stats.bootstrap_resample( dis, dim_data, func=stats.corr, ci=None, seed=0, sel='upper', struct=self.dims[dim].ravel()) for i, b in enumerate(bf): df.append([dim, depth, model_name, layer, corr, i, b]) else: df.append([dim, depth, model_name, layer, corr, 0, np.nan]) df = pandas.DataFrame(df, columns=[ 'kind', 'depth', 'models', 'layer', 'correlation', 'iter', 'bootstrap' ]) self.save(df, pref=pref) return df
def accuracy(self, plot=True): nap = self.dissimilarity() acc = nap.pivot_table(values='dist', index=['layer', 'geon', 'fno', 'dimension'], columns='kind').reset_index() acc['accuracy'] = acc['non-accidental'] > acc['metric'] if self.bootstrap: dfs = [] for layer in acc.layer.unique(): sel = acc[acc.layer == layer]['accuracy'] pct = stats.bootstrap_resample(sel, ci=None, func=np.mean) d = OrderedDict([('kind', ['nap'] * len(pct)), ('layer', [layer] * len(pct)), ('accuracy', sel.mean()), ('iter', range(len(pct))), ('bootstrap', pct)]) dfs.append(pandas.DataFrame.from_dict(d)) df = pandas.concat(dfs) else: df = acc.groupby('layer').mean().reset_index() df['kind'] = 'nap' df['iter'] = 0 df['bootstrap'] = np.nan if self.task == 'run' and plot: self.plot_single(df, 'acc') return df
def compare(self, pref, ylim=[-0.1, 1]): print() print("{:=^50}".format(" " + pref + " ")) df = self.get_data_all(pref, kind="compare") if hasattr(self.myexp, "behav"): behav = self.myexp.behav() else: behav = None if behav is not None: rels = {"shape": stats.bootstrap_resample(behav.dissimilarity, func=np.mean)} else: rels = None if pref == "dis_group_diff": values = "preference for perceived shape" df = df.rename(columns={"preference": values}) self.plot_all(df, values, "diff", pref=pref, ceiling=None, color=self.myexp.colors["shape"], ylim=ylim) elif pref == "pred_corr": values = "correlation" df["kind"] = "shape" # df = df.rename(columns={'preference': values}) behav = self.myexp.behav() behav = behav.pivot_table(index=["kind", "subjid"], columns="no", values="acc") # for subset in df.dataset.unique(): # self.myexp.set_subset(subset) # rel = stats.reliability(behav.loc[subset]) # rel = ((1+rel[0])/2., (1+rel[1])/2.) self.plot_all( df, values, "consistency", col="dataset", pref=pref, ceiling=None, color=self.myexp.colors["shape"], ylim=ylim, ) else: if self.myexp.exp == "fonts": values = "clustering accuracy" df = df.rename(columns={"dissimilarity": values}) else: values = "accuracy" for dim in self.myexp.dims: ceiling = None if rels is None else rels[dim] self.plot_all( df[df.kind == dim], values, dim, pref=pref, ceiling=ceiling, color=self.myexp.colors[dim], ylim=ylim ) if self.myexp.bootstrap: bf = self.bootstrap_ttest_grouped(df) if self.myexp.bootstrap: if self.myexp.html is not None: self.myexp.html.writetable(bf, caption="bootstrapped t-test (one-tailed, rel. samples)")
def corr(self): dis = self.dissimilarity() df = [] nname = models.NICE_NAMES[self.model_name].lower() for dim in self.dims: dim_data = load(pref='dis', exp=self.exp, suffix=dim) if dim_data is None: name = self.model_name self.set_model(dim) dim_data = self.dissimilarity() self.set_model(name) if dim_data is None: raise Exception('dimension data %s cannot be obtained' % dim) dim_data = dim_data[dim] if dim_data.ndim == 3: dim_data = np.mean(dim_data, axis=0) struct = self.dims[dim] if self.exp in ['fonts', 'stefania' ] else None if self.filter: dim_data = dim_data[self.sel][:, self.sel] struct = None for layer, data in dis.items(): d = data[self.sel][:, self.sel] if self.filter else data corr = stats.corr(d, dim_data, sel='upper') if self.bootstrap: print('bootstrapping stats...') bf = stats.bootstrap_resample(d, dim_data, func=stats.corr, ci=None, seed=0, sel='upper', struct=struct) for i, b in enumerate(bf): df.append([dim, nname, layer, corr, i, b]) else: df.append([dim, nname, layer, corr, 0, np.nan]) df = pandas.DataFrame(df, columns=[ 'kind', 'models', 'layer', 'correlation', 'iter', 'bootstrap' ]) self.save(df, pref='corr') if self.task == 'run': self.plot_single(df, 'corr') return df
def corr(self): dis = self.dissimilarity() df = [] nname = models.NICE_NAMES[self.model_name].lower() for dim in self.dims: dim_data = load(pref="dis", exp=self.exp, suffix=dim) if dim_data is None: name = self.model_name self.set_model(dim) dim_data = self.dissimilarity() self.set_model(name) if dim_data is None: raise Exception("dimension data %s cannot be obtained" % dim) dim_data = dim_data[dim] if dim_data.ndim == 3: dim_data = np.mean(dim_data, axis=0) struct = self.dims[dim] if self.exp in ["fonts", "stefania"] else None if self.filter: dim_data = dim_data[self.sel][:, self.sel] struct = None for layer, data in dis.items(): d = data[self.sel][:, self.sel] if self.filter else data corr = stats.corr(d, dim_data, sel="upper") if self.bootstrap: print("bootstrapping stats...") bf = stats.bootstrap_resample( d, dim_data, func=stats.corr, ci=None, seed=0, sel="upper", struct=struct ) for i, b in enumerate(bf): df.append([dim, nname, layer, corr, i, b]) else: df.append([dim, nname, layer, corr, 0, np.nan]) df = pandas.DataFrame(df, columns=["kind", "models", "layer", "correlation", "iter", "bootstrap"]) self.save(df, pref="corr") if self.task == "run": self.plot_single(df, "corr") return df
def _corr_all_orig(self, pref): df = [] for dim in self.myexp.dims: dim_data = load(pref="dis", exp=self.myexp.exp, suffix=dim)[dim] if dim_data.ndim == 3: dim_data = np.mean(dim_data, axis=0) for depth, model_name in self.myexp.models: self.myexp.set_model(model_name) dis = self.myexp.dissimilarity() layer = dis.keys()[-1] dis = dis[layer] corr = stats.corr(dis, dim_data, sel="upper") if self.myexp.bootstrap: print("bootstrapping stats...") bf = stats.bootstrap_resample( dis, dim_data, func=stats.corr, ci=None, seed=0, sel="upper", struct=self.dims[dim].ravel() ) for i, b in enumerate(bf): df.append([dim, depth, model_name, layer, corr, i, b]) else: df.append([dim, depth, model_name, layer, corr, 0, np.nan]) df = pandas.DataFrame(df, columns=["kind", "depth", "models", "layer", "correlation", "iter", "bootstrap"]) self.save(df, pref=pref) return df
def bootstrap_resample(r): if n_boot == 0 or n_boot is None: return (np.nan, np.nan) else: return stats.bootstrap_resample(r, ci=ci, niter=n_boot)
def compare(self, pref, ylim=[-.1, 1]): print() print('{:=^50}'.format(' ' + pref + ' ')) df = self.get_data_all(pref, kind='compare') if hasattr(self.myexp, 'behav'): behav = self.myexp.behav() else: behav = None if behav is not None: rels = { 'shape': stats.bootstrap_resample(behav.dissimilarity, func=np.mean) } else: rels = None if pref == 'dis_group_diff': values = 'preference for perceived shape' df = df.rename(columns={'preference': values}) self.plot_all(df, values, 'diff', pref=pref, ceiling=None, color=self.myexp.colors['shape'], ylim=ylim) elif pref == 'pred_corr': values = 'correlation' df['kind'] = 'shape' # df = df.rename(columns={'preference': values}) behav = self.myexp.behav() behav = behav.pivot_table(index=['kind', 'subjid'], columns='no', values='acc') # for subset in df.dataset.unique(): # self.myexp.set_subset(subset) # rel = stats.reliability(behav.loc[subset]) # rel = ((1+rel[0])/2., (1+rel[1])/2.) self.plot_all(df, values, 'consistency', col='dataset', pref=pref, ceiling=None, color=self.myexp.colors['shape'], ylim=ylim) else: if self.myexp.exp == 'fonts': values = 'clustering accuracy' df = df.rename(columns={'dissimilarity': values}) else: values = 'accuracy' for dim in self.myexp.dims: ceiling = None if rels is None else rels[dim] self.plot_all(df[df.kind == dim], values, dim, pref=pref, ceiling=ceiling, color=self.myexp.colors[dim], ylim=ylim) if self.myexp.bootstrap: bf = self.bootstrap_ttest_grouped(df) if self.myexp.bootstrap: if self.myexp.html is not None: self.myexp.html.writetable( bf, caption='bootstrapped t-test (one-tailed, rel. samples)' )