def _evaluate_composite_score(y_true, y_pred, score_func): """ Evaluate composite scores based on the contingency table like precision, specificity or sensitivity by using jackknife re-sampling. :param y_true: :param y_pred: :param score_func: :return: score and standard error References: Efron and Stein, (1981), "The jackknife estimate of variance." """ def _compute_jackknife_stderr(x): n = x.shape[0] # np.sqrt((((n - 1) / n) * np.sum((x - x.mean()) ** 2))) return np.sqrt(n - 1) * np.std(x) composite_score = score_func(y_true, y_pred) # jackknifing to obtain std err estimate index = np.arange(y_true.shape[0]) jack_idx = jackknife_resampling(index).astype(np.int) jack_scores = np.array( [score_func(y_true[idx], y_pred[idx]) for idx in jack_idx]) jack_stderr = _compute_jackknife_stderr(jack_scores) return composite_score, jack_stderr
def jack_knife(p, q): p_jk_resampling = jackknife_resampling(p) q_jk_resampling = jackknife_resampling(q) min_dist = wasserstein_distance(p_jk_resampling[0, :], q_jk_resampling[0, :]) m, k = p_jk_resampling.shape for i in range(1, m): temp_dist = wasserstein_distance(p_jk_resampling[i, :], q_jk_resampling[i, :]) if min_dist > temp_dist: min_dist = temp_dist return min_dist # dodaj jos koju ne budi lenj ...
def jackknife(data, n): resamples = jackknife_resampling(data) x_resamples = [ np.var(resamples[i]) / (n * n) for i in range(len(resamples)) ] v = np.var(data) / (n * n) return np.sqrt( np.sum([(x_resamples[i] - v) * (x_resamples[i] - v) for i in range(len(x_resamples))]))
def jackknife(data, N, kb, T, method): resamples = jackknife_resampling(data) x_resamples = [ method(variance(resamples[i]), N, kb, T) for i in range(len(resamples)) ] v = variance(data) c = method(v, N, kb, T) return np.sqrt( np.sum([(x_resamples[i] - c) * (x_resamples[i] - c) for i in range(len(x_resamples))]))
from astropy.stats import jackknife_stats from statistics import harmonic_mean import math #data = np.array([1.01, 0.99, 0.78, 1.12, 1.20, 0.86, 0.65, 0.56, 0.87, 0.63, 0.70, 1.24, 1.40]) data = np.array([ 50.69, 52.95, 52.47, 51.85, 51.55, 51.49, 52.08, 52.19, 51.49, 52.34, 51.95, 51.73, 51.93, 51.3, 51.7, 51.28, 52.0, 52.07, 52.31, 52.28, 51.42, 52.26, 51.64, 51.74, 50.34, 51.11, 52.55, 51.5, 53.22, 51.4 ]) #data = np.array([1,2,3,4,5,6,7,8,9,0]) harmonica = harmonic_mean(data) resamples = jackknife_resampling(data) #print(resamples) print(resamples.shape) hrList = [] for x in resamples: hrList.append(harmonic_mean(x)) #print(" Hermonic mean list : ",hrList) print(" Arithmetic mean of harmonic means ", np.mean(hrList)) hmean = np.mean(hrList)
def sensitivity_jacknife_fullP(root): # get the name of all folders under root all_conditions = [] folder_paths = [] for folder in os.listdir(root): if folder[0] != '.': folder_paths.append(root + '/' + folder) all_conditions.append(folder) # initialize results dataframe all_cond_angles = pd.DataFrame( ) # all ori pitch including all conditions, for validation binned_angles = pd.DataFrame( ) # binned mean of pitch for plotting as "raw data" coef_ori = pd.DataFrame( ) # coef results calculated with all original pitch data fitted_y_ori = pd.DataFrame() # fitted y using all original pitch data jackknifed_coef = pd.DataFrame( ) # coef results calculated with jackknifed pitch data jackknifed_y = pd.DataFrame() # fitted y using jackknifed pitch data # for each folder (condition) for condition_idx, folder in enumerate(folder_paths): # enter each condition folder (e.g. 7dd_ctrl) for subpath, subdir_list, subfile_list in os.walk(folder): # if folder is not empty if subdir_list: all_day_angles = pd.DataFrame() # loop through each sub-folder (experiment) under each condition for expNum, exp in enumerate(subdir_list): # for each sub-folder, get the path exp_path = os.path.join(subpath, exp) df = pd.read_hdf(f"{exp_path}/IEI_data.h5", key='prop_bout_IEI2') body_angles = df.loc[:, [ 'propBoutIEI', 'propBoutIEI_pitch', 'propBoutIEItime' ]] day_angles = day_night_split2( body_angles, 'propBoutIEItime').assign(expNum=expNum, date=exp[0:6]) day_angles.dropna(inplace=True) all_day_angles = pd.concat([ all_day_angles, day_angles[[ 'propBoutIEI', 'propBoutIEI_pitch', 'expNum', 'date' ]] ], ignore_index=True) # enter next folder under this condition all_day_angles = all_day_angles.assign( y_boutFreq=1 / all_day_angles['propBoutIEI']) # get binned mean of angles for plotting "raw" data binned_angles = pd.concat([ binned_angles, distribution_binned_average(all_day_angles, BIN_WIDTH, all_conditions[condition_idx]) ], ignore_index=True) # fit angles condition by condition and concatenate results coef, fitted_y = parabola_fit1(all_day_angles, X_RANGE_FULL) coef_ori = pd.concat([ coef_ori, coef.assign(dpf=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:]) ]) fitted_y_ori = pd.concat([ fitted_y_ori, fitted_y.assign( dpf=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:]) ]) # jackknife for the index jackknife_idx = jackknife_resampling( np.array(list(range(expNum + 1)))) for excluded_exp, idx_group in enumerate(jackknife_idx): coef, fitted_y = parabola_fit1( all_day_angles.loc[all_day_angles['expNum'].isin( idx_group)], X_RANGE_FULL) jackknifed_coef = pd.concat([ jackknifed_coef, coef.assign( dpf=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:], excluded_exp=all_day_angles.loc[ all_day_angles['expNum'] == excluded_exp, 'date'].iloc[0]) ]) jackknifed_y = pd.concat([ jackknifed_y, fitted_y.assign( dpf=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:], excluded_exp=all_day_angles.loc[ all_day_angles['expNum'] == excluded_exp, 'date'].iloc[0]) ]) # get all angles at all conditions, for validation. not needed for plotting all_cond_angles = pd.concat([ all_cond_angles, all_day_angles.assign( condition=all_conditions[condition_idx]) ], ignore_index=True) # enter next condition coef_all_cond, fitted_y_all_cond = parabola_fit1(all_cond_angles, X_RANGE_FULL) jackknifed_coef.columns = [ 'sensitivity', 'x_inter', 'y_inter', 'dpf', 'condition', 'jackknife_excluded_sample' ] jackknifed_coef.sort_values(by=['condition', 'dpf'], inplace=True, ignore_index=True) jackknifed_coef['sensitivity'] = jackknifed_coef[ 'sensitivity'] * 1000 # unit: mHz/deg**2 jackknifed_y.columns = [ 'y', 'x', 'dpf', 'condition', 'jackknife_excluded_sample' ] jackknifed_y.sort_values(by=['condition', 'dpf'], inplace=True, ignore_index=True) binned_angles.sort_values(by=['condition', 'dpf'], inplace=True, ignore_index=True) coef_ori.columns = [ 'sensitivity', 'x_inter', 'y_inter', 'dpf', 'condition' ] coef_ori.sort_values(by=['condition', 'dpf'], inplace=True, ignore_index=True) fitted_y_ori.columns = ['y', 'x', 'dpf', 'condition'] fitted_y_ori.sort_values(by=['condition', 'dpf'], inplace=True, ignore_index=True) # %% print("Fitted coefs using ALL data (for reference):") print(coef_all_cond) # plot fitted parabola and sensitivity defaultPlotting() # Separate data by age. age_condition = set(jackknifed_y['dpf'].values) age_cond_num = len(age_condition) # initialize a multi-plot, feel free to change the plot size f, axes = plt.subplots(nrows=4, ncols=age_cond_num, figsize=(2.5 * (age_cond_num), 12), sharey='row') axes = axes.flatten() # flatten if multidimenesional (multiple dpf) # setup color scheme for dot plots flatui = ["#D0D0D0"] * (jackknifed_coef.groupby('condition').size()[0]) defaultPlotting() # loop through differrent age (dpf), plot parabola in the first row and sensitivy in the second. for i, age in enumerate(age_condition): fitted = jackknifed_y.loc[jackknifed_y['dpf'] == age] # dots are plotted with binned average pitches binned = binned_angles.loc[binned_angles['dpf'] == age] g = sns.lineplot(x='x', y='y', hue='condition', data=fitted, ci="sd", ax=axes[i]) g = sns.scatterplot(x='propBoutIEI_pitch', y='y_boutFreq', hue='condition', s=20, data=binned, alpha=0.3, ax=axes[i], linewidth=0) g.set_xticks(np.arange(-90, 135, 45)) # adjust ticks g.set_ylim(0, None, 30) # SENSITIVITY coef_plt = jackknifed_coef.loc[jackknifed_coef['dpf'] == age] # plot jackknifed paired data p = sns.pointplot( x='condition', y='sensitivity', hue='jackknife_excluded_sample', data=coef_plt, palette=sns.color_palette(flatui), scale=0.5, ax=axes[i + age_cond_num], # order=['Sibs','Tau','Lesion'], ) # plot mean data p = sns.pointplot( x='condition', y='sensitivity', hue='condition', data=coef_plt, linewidth=0, alpha=0.9, ci=None, markers='d', ax=axes[i + age_cond_num], # order=['Sibs','Tau','Lesion'], ) p.legend_.remove() # p.set_yticks(np.arange(0.1,0.52,0.04)) # sns.despine(trim=True) # p values for sensitivity condition_s = set(coef_plt['condition'].values) condition_s = list(condition_s) # Paired T Test for 2 conditions if len(condition_s) == 2: # Separate data by condition. coef_cond1 = coef_plt.loc[coef_plt['condition'] == condition_s[0]].sort_values( by='jackknife_excluded_sample') coef_cond2 = coef_plt.loc[coef_plt['condition'] == condition_s[1]].sort_values( by='jackknife_excluded_sample') ttest_res, ttest_p = ttest_rel(coef_cond1['sensitivity'], coef_cond2['sensitivity']) print( f'* Age {age} Sensitivity: {condition_s[0]} v.s. {condition_s[1]} paired t-test p-value = {ttest_p}' ) elif len(condition_s) > 2: multi_comp = MultiComparison( coef_plt['sensitivity'], coef_plt['dpf'] + coef_plt['condition']) print(f'* Age {age} Sensitivity:') print(multi_comp.tukeyhsd().summary()) else: pass # X INTERSECT sns.swarmplot(x='condition', y='x_inter', data=coef_plt, ax=axes[i + age_cond_num * 2]) # Y INTERSECT sns.swarmplot(x='condition', y='y_inter', data=coef_plt, ax=axes[i + age_cond_num * 3]) plt.show()
# fit angles condition by condition and concatenate results coef, fitted_y = parabola_fit1(all_day_angles, X_RANGE_FULL) coef_ori = pd.concat([ coef_ori, coef.assign(dpf=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:]) ]) fitted_y_ori = pd.concat([ fitted_y_ori, fitted_y.assign(dpf=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:]) ]) # jackknife for the index jackknife_idx = jackknife_resampling( np.array(list(range(expNum + 1)))) for excluded_exp, idx_group in enumerate(jackknife_idx): coef, fitted_y = parabola_fit1( all_day_angles.loc[all_day_angles['expNum'].isin( idx_group)], X_RANGE_FULL) jackknifed_coef = pd.concat([ jackknifed_coef, coef.assign(dpf=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:], excluded_exp=all_day_angles.loc[ all_day_angles['expNum'] == excluded_exp, 'date'].iloc[0]) ]) jackknifed_y = pd.concat([ jackknifed_y, fitted_y.assign(
def bout_speed_aligned_jacknife(root): all_conditions = [] folder_paths = [] # get the name of all folders under root for folder in os.listdir(root): if folder[0] != '.': folder_paths.append(root + '/' + folder) all_conditions.append(folder) jack_y_all = pd.DataFrame() ang_std_all = pd.DataFrame() # go through each condition folders under the root for condition_idx, folder in enumerate(folder_paths): # enter each condition folder (e.g. 7dd_ctrl) for subpath, subdir_list, subfile_list in os.walk(folder): # if folder is not empty if subdir_list: all_speed = pd.DataFrame() ang_std = [] # loop through each sub-folder (experiment) under each condition for expNum, exp in enumerate(subdir_list): # for each sub-folder, get the path exp_path = os.path.join(subpath, exp) df = pd.read_hdf(f"{exp_path}/bout_data.h5", key='prop_bout_aligned') # get pitch swim_speed = df.loc[:, ['propBoutAligned_speed']].rename( columns={ 'propBoutAligned_speed': f'exp{expNum}' }).transpose() all_speed = pd.concat([all_speed, swim_speed]) # jackknife for the index jackknife_idx = jackknife_resampling( np.array(list(range(expNum + 1)))) # get the distribution of every jackknifed sample jack_y = pd.concat([ pd.DataFrame( np.histogram( all_speed.iloc[idx_group].to_numpy().flatten(), bins=bins, density=True)) for idx_group in jackknife_idx ], axis=1).transpose() jack_y_all = pd.concat([ jack_y_all, jack_y.assign(age=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:]) ], axis=0, ignore_index=True) # # get the std of every jackknifed sample # for idx_group in jackknife_idx: # ang_std.append(np.nanstd(all_speed.iloc[idx_group].to_numpy().flatten())) # ang_std = pd.DataFrame(ang_std) # ang_std_all = pd.concat([ang_std_all, ang_std.assign(age=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:])], axis=0, ignore_index=True) jack_y_all.columns = ['Probability', 'swim_speed', 'dpf', 'condition'] jack_y_all.sort_values(by=['dpf', 'condition'], inplace=True) # %% defaultPlotting() g = sns.lineplot(x='swim_speed', y='Probability', hue='condition', style='dpf', data=jack_y_all, ci='sd', err_style='band') plt.show()
def IEI_pitch_mean_jacknife(root): all_conditions = [] folder_paths = [] # get the name of all folders under root for folder in os.listdir(root): if folder[0] != '.': folder_paths.append(root+'/'+folder) all_conditions.append(folder) bins = list(range(-90,95,5)) jack_y_all = pd.DataFrame() ang_std_all = pd.DataFrame() # go through each condition folders under the root for condition_idx, folder in enumerate(folder_paths): # enter each condition folder (e.g. 7dd_ctrl) for subpath, subdir_list, subfile_list in os.walk(folder): # if folder is not empty if subdir_list: all_angles = pd.DataFrame() exp_date_match = pd.DataFrame() ang_std = [] # loop through each sub-folder (experiment) under each condition for expNum, exp in enumerate(subdir_list): # for each sub-folder, get the path exp_path = os.path.join(subpath, exp) df = pd.read_hdf(f"{exp_path}/IEI_data.h5", key='prop_bout_IEI2') # get pitch body_angles = df.loc[:,['propBoutIEI_pitch']].rename(columns={'propBoutIEI_pitch':f'exp{expNum}'}).transpose() all_angles = pd.concat([all_angles, body_angles]) exp_date_match = pd.concat([exp_date_match, pd.DataFrame( data= {'expNum':expNum,'date':[exp[0:6]]} )],ignore_index=True) # jackknife for the index jackknife_idx = jackknife_resampling(np.array(list(range(expNum+1)))) # get the distribution of every jackknifed sample for the current condition jack_y = pd.concat([pd.DataFrame( np.histogram(all_angles.iloc[idx_group].to_numpy().flatten(), bins=bins, density=True) ) for idx_group in jackknife_idx], axis=1).transpose() # combine conditions jack_y_all = pd.concat([jack_y_all, jack_y.assign(age=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:])], axis=0, ignore_index=True) # get the std of every jackknifed sample for excluded_exp, idx_group in enumerate(jackknife_idx): ang_std.append(np.nanstd(all_angles.iloc[idx_group].to_numpy().flatten())) ang_std = pd.DataFrame(ang_std).assign(excluded_exp=exp_date_match['date']) ang_std_all = pd.concat([ang_std_all, ang_std.assign(age=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:])], axis=0, ignore_index=True) jack_y_all.columns = ['Probability','Posture (deg)','dpf','condition'] jack_y_all.sort_values(by=['condition'],inplace=True) ang_std_all.columns = ['std(posture)','excluded_exp','dpf','condition'] ang_std_all.sort_values(by=['condition'],inplace=True) # %% # Stats # # For multiple comparison # multi_comp = MultiComparison(ang_std_all['std(posture)'], ang_std_all['dpf']+ang_std_all['condition']) # print(multi_comp.tukeyhsd().summary()) # %% # Plot posture distribution and its standard deviation defaultPlotting() # Separate data by age. age_condition = set(jack_y_all['dpf'].values) age_cond_num = len(age_condition) # initialize a multi-plot, feel free to change the plot size f, axes = plt.subplots(nrows=2, ncols=age_cond_num, figsize=(2.5*(age_cond_num), 10), sharey='row') axes = axes.flatten() # flatten if multidimenesional (multiple dpf) # setup color scheme for dot plots flatui = ["#D0D0D0"] * (ang_std_all.groupby('condition').size()[0]) defaultPlotting() # loop through differrent age (dpf), plot parabola in the first row and sensitivy in the second. for i, age in enumerate(age_condition): fitted = jack_y_all.loc[jack_y_all['dpf']==age] g = sns.lineplot(x='Posture (deg)',y='Probability', hue='condition', style='dpf', data=fitted, ci='sd', err_style='band', ax=axes[i]) # g.set_yticks(np.arange(x,y,step)) # adjust y ticks g.set_xticks(np.arange(-90,135,45)) # adjust x ticks # plot std std_plt = ang_std_all.loc[ang_std_all['dpf']==age] # plot jackknifed paired data p = sns.pointplot(x='condition', y='std(posture)', hue='excluded_exp',data=std_plt, palette=sns.color_palette(flatui), scale=0.5, ax=axes[i+age_cond_num], # order=['Sibs','Tau','Lesion'], ) # plot mean data p = sns.pointplot(x='condition', y='std(posture)',hue='condition',data=std_plt, linewidth=0, alpha=0.9, ci=None, markers='d', ax=axes[i+age_cond_num], # order=['Sibs','Tau','Lesion'], ) p.legend_.remove() # p.set_yticks(np.arange(0.1,0.52,0.04)) sns.despine(trim=True) condition_s = set(std_plt['condition'].values) condition_s = list(condition_s) if len(condition_s) == 2: # Paired T Test for 2 conditions # Separate data by condition. std_cond1 = std_plt.loc[std_plt['condition']==condition_s[0]].sort_values(by='excluded_exp') std_cond2 = std_plt.loc[std_plt['condition']==condition_s[1]].sort_values(by='excluded_exp') ttest_res, ttest_p = ttest_rel(std_cond1['std(posture)'],std_cond2['std(posture)']) print(f'* Age {age}: {condition_s[0]} v.s. {condition_s[1]} paired t-test p-value = {ttest_p}') elif len(condition_s) > 2: # multiple comparison for more than 2 conditions print(f'* Age {age}:' ) multi_comp = MultiComparison(ang_std_all['std(posture)'], ang_std_all['dpf']+ang_std_all['condition']) print(multi_comp.tukeyhsd().summary()) else: pass plt.show()