def findErrorBarsBindingCurve(subSeries): """ Return bootstrapped confidence intervals on columns of an input data matrix. Assuming rows represent replicate measurments, i.e. clusters. """ eminus=[] eplus = [] for i in subSeries: vec = subSeries.loc[:, i].dropna() success = True if len(vec) > 1: try: bounds = bootstrap.ci(vec, np.median, n_samples=1000) except IndexError: success = False else: success = False if success: eminus.append(vec.median() - bounds[0]) eplus.append(bounds[1] - vec.median()) else: eminus.append(np.nan) eplus.append(np.nan) eminus = pd.Series(eminus, index=subSeries.columns) eplus = pd.Series(eplus, index=subSeries.columns) return eminus, eplus
def returnFractionGroupedBy(mat, param_in, param_out): """ Given data matrix, return mean of param_out binned by param_in. """ grouped = mat.groupby(param_in)[param_out] y = grouped.mean() x = y.index.tolist() with warnings.catch_warnings(): warnings.simplefilter("ignore") yerr = np.array([np.abs(bootstrap.ci(group, method='pi', n_samples=100) - y.loc[name]) for name, group in grouped]).transpose() return x, y, yerr
def findAlpha(self, n): x = self.x y = self.y index = (x>0)&(y>0) #vec = np.exp(np.log(y/x)/n).loc[index] vec = np.power(y/x, 1/float(n)).loc[index] alpha = vec.median() lb, ub = bootstrap.ci(vec, statfunction=np.median, n_samples=1000) xlim = [0.9 ,1.1] bins = np.arange(0.898, 1.1, 0.001) plt.figure(figsize=(4,3)); sns.distplot(vec, bins=bins, hist_kws={'histtype':'stepfilled'}, kde_kws={'clip':xlim}); plt.axvline(alpha, color='k', linewidth=0.5); plt.axvline(lb, color='k', linestyle=':', linewidth=0.5); plt.axvline(ub, color='k', linestyle=':', linewidth=0.5); plt.xlabel('photobleach fraction per image'); plt.ylabel('probability'); plt.xlim(xlim) fix_axes(plt.gca()); plt.tight_layout() return alpha, lb, ub
'substrate']].dropna() merge.rename(columns={'substrate_x': 'substrate'}, inplace=True) del df1, df2, df3 grouped = merge.groupby('substrate') sand = grouped.get_group('sand') gravel = grouped.get_group('gravel') boulders = grouped.get_group('boulders') del merge print 'Calculating calibrations metrics for lsq classifications...' calib_df = pd.DataFrame(columns=['ent', 'h**o', 'var'], index=['sand', 'gravel', 'boulders']) calib_df.loc['sand'] = pd.Series({ 'h**o': 1 - np.average(boot.ci(sand['homo_median'], np.median, alpha=0.05)), 'ent': np.average(boot.ci(sand['entropy_median'], np.median, alpha=0.05)), 'var': np.average(boot.ci(sand['var_median'], np.median, alpha=0.05)) }) calib_df.loc['gravel'] = pd.Series({ 'h**o': 1 - np.average(boot.ci(gravel['homo_median'], np.median, alpha=0.05)), 'ent': np.average(boot.ci(gravel['entropy_median'], np.median, alpha=0.05)), 'var': np.average(boot.ci(gravel['var_median'], np.median, alpha=0.05)) }) calib_df.loc['boulders'] = pd.Series({ 'h**o':
'std': 'var_std' }, inplace=True) merge = df1.merge(df2, left_index=True, right_index=True, how='left') merge = merge.merge(df3, left_index=True, right_index=True, how='left') merge = merge[['homo_median', 'entropy_median', 'var_median', 'substrate']].dropna() merge.rename(columns={'substrate_x': 'substrate'}, inplace=True) del df1, df2, df3 grouped = merge.groupby('substrate') sand = grouped.get_group('sand') gravel = grouped.get_group('gravel') boulders = grouped.get_group('boulders') boot.ci(sand['homo_median'], np.median, alpha=0.05) boot.ci(sand['entropy_median'], np.median, alpha=0.05) boot.ci(sand['var_median'], np.median, alpha=0.05) boot.ci(gravel['homo_median'], np.median, alpha=0.05) boot.ci(gravel['entropy_median'], np.median, alpha=0.05) boot.ci(gravel['var_median'], np.median, alpha=0.05) boot.ci(boulders['homo_median'], np.median, alpha=0.05) boot.ci(boulders['entropy_median'], np.median, alpha=0.05) boot.ci(boulders['var_median'], np.median, alpha=0.05) homo_df = pd.DataFrame(index=['Sand', 'Gravel', 'Boulders'], columns=['lbound', 'ubound']) homo_df.loc['Sand'] = pd.Series({ 'lbound':
def get_final_ci(series): mean = series.mean() std = series.std(ddof=1) low_ci, high_ci = bootstrap.ci(series, n_samples=1000) return mean, std, low_ci, high_ci