diff_data.shape plt_dat = np.average(diff_data[:, :, :, :, tidx], 4) plt_dat = np.average(plt_dat[:, :, :, fidx], 3) plt_dat = plt_dat[:, :, cidx] plt_dat = pd.DataFrame(data=np.swapaxes(plt_dat, 1, 0), columns=conditions) plt_dat = pd.melt(plt_dat, var_name='Condition', value_name='Power') pt.half_violinplot(x='Condition', y="Power", data=plt_dat, inner=None, jitter=True, color=".7", lwidth=0, width=0.6, offset=0.17, cut=1, ax=axes[-1], linewidth=1, alpha=0.6, palette=[param['palette'][0], param['palette'][3]], zorder=19) sns.stripplot(x='Condition', y="Power", data=plt_dat, jitter=0.08, ax=axes[-1], linewidth=1, alpha=0.6, palette=[param['palette'][0], param['palette'][3]],
def boxplot_freqs(foi, chan, time, gavg, data_all, ax, pal): # Colour palette for plotting c = 'CS-1' fidx = np.arange( np.where(gavg[c].freqs == foi[0])[0], np.where(gavg[c].freqs == foi[1])[0]) times = gavg[c].times tidx = np.arange(np.argmin(np.abs(times - time[0])), np.argmin(np.abs(times - time[1]))) cidx = gavg[c].ch_names.index(chan) plt_dat = data_all[:, :, cidx, :, :] plt_dat = plt_dat[:, :, fidx, :] plt_dat = plt_dat[:, :, :, tidx] plt_dat = np.average(plt_dat, 3) plt_dat.shape plt_dat = np.average(plt_dat, 2) plt_dat = pd.DataFrame(data=np.swapaxes(plt_dat, 1, 0), columns=['CS-1', 'CS-2', 'CS-E', 'CS+']) plt_dat = pd.melt(plt_dat, var_name='Condition', value_name='Power') pt.half_violinplot(x='Condition', y="Power", data=plt_dat, inner=None, jitter=True, color=".7", lwidth=0, width=0.6, offset=0.17, cut=1, ax=ax, linewidth=1, alpha=0.6, palette=pal, zorder=19) sns.stripplot(x='Condition', y="Power", data=plt_dat, jitter=0.08, ax=ax, linewidth=1, alpha=0.6, palette=pal, zorder=1) sns.boxplot(x='Condition', y="Power", data=plt_dat, palette=pal, whis=np.inf, linewidth=1, ax=ax, width=0.1, boxprops={ "zorder": 10, 'alpha': 0.5 }, whiskerprops={ 'zorder': 10, 'alpha': 1 }, medianprops={ 'zorder': 11, 'alpha': 0.5 }) return ax
def genera_raincloud(): # Plotting the clouds f, ax = plt.subplots(figsize=(7, 5)) dy = "variety" dx = "sepal.length" ort = "h" pal = sns.color_palette(n_colors=1) ax = pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort) plt.title("Raincloud with Clouds") plt.savefig("img/Raincloud_Clouds.png") plt.close() # Adding the rain f, ax = plt.subplots(figsize=(7, 5)) ax = pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort) ax = sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white", size=3, jitter=0, zorder=0, orient=ort) plt.title("Raincloud with Clouds and Rain") plt.savefig("img/Raincloud_Clouds_Rain.png") plt.close() # Adding jitter to the rain f, ax = plt.subplots(figsize=(7, 5)) ax = pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort) ax = sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white", size=3, jitter=1, zorder=0, orient=ort) plt.title("Raincloud with Clouds and Jitter rain") plt.savefig("img/Raincloud_Clouds_Rain_Jitter.png") # Adding the boxplot with quartiles f, ax = plt.subplots(figsize=(7, 5)) ax = pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort) ax = sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white", size=3, jitter=1, zorder=0, orient=ort) ax=sns.boxplot( x = dx, y = dy, data = df, color = "black", width = .15, zorder = 10,\ showcaps = True, boxprops = {'facecolor':'none', "zorder":10},\ showfliers=True, whiskerprops = {'linewidth':2, "zorder":10},\ saturation = 1, orient = ort) sns.boxplot( x = dx, y = dy, data = df, color = "black", width = .15, zorder = 10,\ showcaps = True, boxprops = {'facecolor':'none', "zorder":10},\ showfliers=True, whiskerprops = {'linewidth':2, "zorder":10},\ saturation = 1, orient = ort) plt.title("Raincloud with Boxplot") plt.savefig("img/Raincloud_Boxplot.png") plt.close() dx = "variety" dy = "sepal.length" ort = "h" pal = "Set2" sigma = .2 f, ax = plt.subplots(figsize=(7, 5)) ax = pt.RainCloud(x=dx, y=dy, data=df, palette=pal, bw=sigma, width_viol=.6, ax=ax, orient=ort, move=.2) plt.title("Raincloud with Boxplot and Shifted Rain") plt.savefig("img/Raincloud_Boxplot_Shifted_Rain.png") plt.close()
def plot_violins(df): plt.figure(figsize=(1.65 * 2, 1.4)) dx = 'type' dy = 'error' pal = "tab10" ort = 'v' df = df[df.error < np.log10(3)] ax = pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.1, cut=0., scale="width", width=1., inner=None, orient=ort, linewidth=0.8, offset=0.2) ax = sns.stripplot(x=dx, y=dy, data=df.sample(2000), palette=pal, edgecolor="white", size=1, jitter=1, zorder=1, orient=ort, alpha=0.5) ax = sns.boxplot(x=dx, y=dy, data=df, color="black", width=.2, zorder=10, showcaps=True, boxprops={ 'facecolor': 'none', "zorder": 10 }, showfliers=True, whiskerprops={ 'linewidth': 1, "zorder": 10 }, saturation=1, orient=ort, fliersize=0, linewidth=1) yticks = [1 / 3, 1 / 2, 1 / 1.5, 1 / 1.2, 1, 1.2, 1.5, 2, 3] ax.set_yticks(np.log10(yticks)) ax.set_yticklabels([r"{:.2f} $\times$".format(y) for y in yticks]) plt.ylim(-0.03, np.log10(3)) plt.ylabel("Absolute Error") plt.xlim(-0.8, 3.3) plt.xticks([0, 1, 2, 3], [ "POSIX Baseline\nMedian = $1.173\\times$", "POSIX + start time\nMedian = $1.117\\times$" ], rotation=30) plt.xlabel("") print("Baseline median absolute error: {}".format( 10**df[df.type == 'baseline'].error.median())) print("Timestamps median absolute error: {}".format( 10**df[df.type == 'timestamp'].error.median())) plt.savefig("figures/figure_4.pdf", dpi=600, bbox_inches='tight')
def raincloud_w_fail(sel_dict_path, lesion_dict_path, plot_type='classes', coi_measure='c_informed', top_layers='all', selected_units=False, plots_dir='simple_rain_plots', plot_fails=False, plot_class_change=False, normed_acts=False, layer_act_dist=False, verbose=False, test_run=False, ): """ With visualise units with raincloud plot. has distributions (cloud), individual activations (raindrops), boxplot to give median and interquartile range. Also has plot of zero activations, scaled by class size. Will show items that are affected by lesioning in different colours. I only have lesion data for [conv2d, dense] layers I have GHA and sel data from [conv2d, activation, max_pooling2d, dense] layers so for each lesioned layer [conv2d, dense] I will use the following activation layer to take GHA and sel data from. Join these into groups using the activation number as the layer numbers. e.g., layer 1 (first conv layer) = conv2d_1 & activation_1. layer 7 (first fc layer) = dense1 & activation 7) :param sel_dict_path: path to selectivity dict :param lesion_dict_path: path to lesion dict :param plot_type: all classes or OneVsAll. if n_cats > 10, should automatically revert to oneVsAll. :param coi_measure: measure to use when choosing which class should be the coi. Either the best performing sel measures (c_informed, c_ROC) or max class drop from lesioning. :param top_layers: if int, it will just do the top n mayers (excluding output). If not int, will do all layers. :param selected_units: default is to test all units on all layers. But If I just want individual units, I should be able to input a dict with layer names as keys and a list for each unit on that layer. e.g., to just get unit 216 from 'fc_1' use selected_units={'fc_1': [216]}. :param plots_dir: where to save plots :param plot_fails: If False, just plots correct items, if true, plots items that failed after lesioning in RED :param plot_class_change: if True, plots proportion of items correct per class. :param normed_acts: if False use actual activation values, if True, normalize activations 0-1 :param layer_act_dist: plot the distribution of all activations on a given layer. This should already have been done in GHA :param verbose: how much to print to screen :param test_run: if True, just plot two units from two layers, if False, plot all (or selected units) returns nothings, just saves the plots """ print("\n**** running visualise_units()****") if not selected_units: print(f"selected_units?: {selected_units}\n" "running ALL layers and units") else: print(focussed_dict_print(selected_units, 'selected_units')) # if type(selected_units) is dict: # print("dict found") # # lesion dict lesion_dict = load_dict(lesion_dict_path) focussed_dict_print(lesion_dict, 'lesion_dict') # # get key_lesion_layers_list lesion_info = lesion_dict['lesion_info'] lesion_path = lesion_info['lesion_path'] lesion_highlighs = lesion_info["lesion_highlights"] key_lesion_layers_list = list(lesion_highlighs.keys()) # # remove unnecesary items from key layers list if 'highlights' in key_lesion_layers_list: key_lesion_layers_list.remove('highlights') # if 'output' in key_lesion_layers_list: # key_lesion_layers_list.remove('output') # if 'Output' in key_lesion_layers_list: # key_lesion_layers_list.remove('Output') # # remove output layers from key layers list if any("utput" in s for s in key_lesion_layers_list): output_layers = [s for s in key_lesion_layers_list if "utput" in s] output_idx = [] for out_layer in output_layers: output_idx.append(key_lesion_layers_list.index(out_layer)) min_out_idx = min(output_idx) key_lesion_layers_list = key_lesion_layers_list[:min_out_idx] # # remove output layers from key layers list if any("predictions" in s for s in key_lesion_layers_list): output_layers = [s for s in key_lesion_layers_list if "predictions" in s] output_idx = [] for out_layer in output_layers: output_idx.append(key_lesion_layers_list.index(out_layer)) min_out_idx = min(output_idx) key_lesion_layers_list = key_lesion_layers_list[:min_out_idx] class_labels = list(lesion_dict['data_info']['cat_names'].values()) # # sel_dict sel_dict = load_dict(sel_dict_path) if key_lesion_layers_list[0] in sel_dict['sel_info']: print('\nfound old sel dict layout') key_gha_sel_layers_list = list(sel_dict['sel_info'].keys()) old_sel_dict = True # sel_info = sel_dict['sel_info'] # short_sel_measures_list = list(sel_info[key_lesion_layers_list[0]][0]['sel'].keys()) # csb_list = list(sel_info[key_lesion_layers_list[0]][0]['class_sel_basics'].keys()) # sel_measures_list = short_sel_measures_list + csb_list else: print('\nfound NEW sel dict layout') old_sel_dict = False sel_info = load_dict(sel_dict['sel_info']['sel_per_unit_pickle_name']) # sel_measures_list = list(sel_info[key_lesion_layers_list[0]][0].keys()) key_gha_sel_layers_list = list(sel_info.keys()) # print(sel_info.keys()) # # get key_gha_sel_layers_list # # # remove unnecesary items from key layers list # if 'sel_analysis_info' in key_gha_sel_layers_list: # key_gha_sel_layers_list.remove('sel_analysis_info') # if 'output' in key_gha_sel_layers_list: # output_idx = key_gha_sel_layers_list.index('output') # key_gha_sel_layers_list = key_gha_sel_layers_list[:output_idx] # if 'Output' in key_gha_sel_layers_list: # output_idx = key_gha_sel_layers_list.index('Output') # key_gha_sel_layers_list = key_gha_sel_layers_list[:output_idx] # # remove output layers from key layers list if any("utput" in s for s in key_gha_sel_layers_list): output_layers = [s for s in key_gha_sel_layers_list if "utput" in s] output_idx = [] for out_layer in output_layers: output_idx.append(key_gha_sel_layers_list.index(out_layer)) min_out_idx = min(output_idx) key_gha_sel_layers_list = key_gha_sel_layers_list[:min_out_idx] # key_layers_df = key_layers_df.loc[~key_layers_df['name'].isin(output_layers)] # # remove output layers from key layers list if any("predictions" in s for s in key_lesion_layers_list): output_layers = [s for s in key_lesion_layers_list if "predictions" in s] output_idx = [] for out_layer in output_layers: output_idx.append(key_lesion_layers_list.index(out_layer)) min_out_idx = min(output_idx) key_lesion_layers_list = key_lesion_layers_list[:min_out_idx] # # put together lists of 1. sel_gha_layers, 2. key_lesion_layers_list. n_activation_layers = sum("activation" in layers for layers in key_gha_sel_layers_list) n_lesion_layers = len(key_lesion_layers_list) if n_activation_layers == n_lesion_layers: # # for models where activation and conv (or dense) are separate layers n_layers = n_activation_layers activation_layers = [layers for layers in key_gha_sel_layers_list if "activation" in layers] link_layers_dict = dict(zip(reversed(activation_layers), reversed(key_lesion_layers_list))) elif n_activation_layers == 0: print("\nno separate activation layers found - use key_lesion_layers_list") n_layers = len(key_lesion_layers_list) link_layers_dict = dict(zip(reversed(key_lesion_layers_list), reversed(key_lesion_layers_list))) else: print(f"n_activation_layers: {n_activation_layers}\n{key_gha_sel_layers_list}") print("n_lesion_layers: {n_lesion_layers}\n{key_lesion_layers_list}") raise TypeError('should be same number of activation layers and lesioned layers') if verbose is True: focussed_dict_print(link_layers_dict, 'link_layers_dict') # # # get info exp_cond_path = sel_dict['topic_info']['exp_cond_path'] output_filename = sel_dict['topic_info']['output_filename'] # hid acts hdf hdf_name = f'{output_filename}_gha.h5' # # load data # # check for training data use_dataset = sel_dict['GHA_info']['use_dataset'] n_cats = sel_dict['data_info']["n_cats"] if use_dataset in sel_dict['data_info']: # n_items = sel_dict["data_info"][use_dataset]["n_items"] items_per_cat = sel_dict["data_info"][use_dataset]["items_per_cat"] else: # n_items = sel_dict["data_info"]["n_items"] items_per_cat = sel_dict["data_info"]["items_per_cat"] if type(items_per_cat) is int: items_per_cat = dict(zip(list(range(n_cats)), [items_per_cat] * n_cats)) if plot_type != 'OneVsAll': if n_cats > 20: plot_type = 'OneVsAll' print("\n\n\nWARNING! There are lots of classes, it might make a messy plot" "Switching to OneVsAll\n") if sel_dict['GHA_info']['gha_incorrect'] == 'False': # # only gha for correct items # n_items = sel_dict['GHA_info']['scores_dict']['n_correct'] items_per_cat = sel_dict['GHA_info']['scores_dict']['corr_per_cat_dict'] # # load hid acts dict called hid_acts.pickle """ Hid_acts dict has numbers as the keys for each layer. Some layers (will be missing) as acts only recorded from some layers (e.g., [17, 19, 20, 22, 25, 26, 29, 30]) hid_acts_dict.keys(): dict_keys([0, 1, 3, 5, 6, 8, 9, 11, 13, 14, 16, 17, 19, 20, 22, 25, 26, 29, 30]) hid_acts_dict[0].keys(): dict_keys(['layer_name', 'layer_class', 'layer_shape', '2d_acts', 'converted_to_2d']) In each layer there is ['layer_name', 'layer_class', 'layer_shape', '2d_acts'] For 4d layers (conv, pool) there is also, key, value 'converted_to_2d': True """ # # check if I have saved the location to this file hid_acts_pickle_name = sel_dict["GHA_info"]["hid_act_files"]['2d'] if 'gha_path' in sel_dict['GHA_info']: gha_path = sel_dict['GHA_info']['gha_path'] hid_acts_path = os.path.join(gha_path, hid_acts_pickle_name) else: hid_act_items = 'all' if not sel_dict['GHA_info']['gha_incorrect']: hid_act_items = 'correct' gha_folder = f'{hid_act_items}_{use_dataset}_gha' hid_acts_path = os.path.join(exp_cond_path, gha_folder, hid_acts_pickle_name) with open(hid_acts_path, 'rb') as pkl: hid_acts_dict = pickle.load(pkl) print("\nopened hid_acts.pickle") # # # visualizing distribution of activations # if layer_act_dist: # print("\nPlotting the distributions of activations for each layer") # for k, v in hid_acts_dict.items(): # print("\nPlotting distribution of layer acts") # layer_act_dist_dir = 'layer_act_dist' # print(hid_acts_dict[k]['layer_name']) # hid_acts = hid_acts_dict[k]['2d_acts'] # print(np.shape(hid_acts)) # sns.distplot(np.ravel(hid_acts)) # plt.title(str(hid_acts_dict[k]['layer_name'])) # dist_plot_name = "{}_{}_layer_act_distplot.png".format(output_filename, hid_acts_dict[k]['layer_name']) # plt.savefig(os.path.join(plots_dir, layer_act_dist_dir, dist_plot_name)) # # plt.show() # plt.close() # # dict to get the hid_acts_dict key for each layer based on its name get_hid_acts_number_dict = dict() for key, value in hid_acts_dict.items(): hid_acts_layer_name = value['layer_name'] hid_acts_layer_number = key get_hid_acts_number_dict[hid_acts_layer_name] = hid_acts_layer_number # # where to save files save_plots_name = plots_dir if plot_type is "OneVsAll": save_plots_name = f'{plots_dir}/{coi_measure}' save_plots_dir = lesion_dict['GHA_info']['gha_path'] save_plots_path = os.path.join(save_plots_dir, save_plots_name) if test_run: save_plots_path = os.path.join(save_plots_path, 'test') if not os.path.exists(save_plots_path): os.makedirs(save_plots_path) os.chdir(save_plots_path) print(f"\ncurrent wd: {os.getcwd()}") if layer_act_dist: layer_act_dist_path = os.path.join(save_plots_path, 'layer_act_dist') if not os.path.exists(layer_act_dist_path): os.makedirs(layer_act_dist_path) print("\n\n**********************" "\nlooping through layers" "\n**********************\n") for layer_index, (gha_layer_name, lesion_layer_name) in enumerate(link_layers_dict.items()): if test_run: if layer_index > 2: continue if type(top_layers) is int: if top_layers < n_activation_layers: if layer_index > top_layers: continue # print(f"\nwhich units?: {selected_units}") # if selected_units != 'all': if selected_units is not False: if gha_layer_name not in selected_units: print(f"\nselected_units only, skipping layer {gha_layer_name}") continue else: print(f"\nselected_units only, from {gha_layer_name}") # print(f"\t{gha_layer_name} in {list(selected_units.keys())}") this_layer_units = selected_units[gha_layer_name] print(f"\trunning units: {this_layer_units}") gha_layer_number = get_hid_acts_number_dict[gha_layer_name] layer_dict = hid_acts_dict[gha_layer_number] if gha_layer_name != layer_dict['layer_name']: raise TypeError("gha_layer_name (from link_layers_dict) and layer_dict['layer_name'] should match! ") # hid_acts_array = layer_dict['2d_acts'] # hid_acts_df = pd.DataFrame(hid_acts_array, dtype=float) with h5py.File(hdf_name, 'r') as gha_data: hid_acts_array = gha_data['hid_acts_2d'][gha_layer_name] hid_acts_df = pd.DataFrame(hid_acts_array) # # visualizing distribution of activations if layer_act_dist: hid_acts = layer_dict['2d_acts'] print(f"\nPlotting distribution of activations {np.shape(hid_acts)}") sns.distplot(np.ravel(hid_acts)) plt.title(f"{str(layer_dict['layer_name'])} activation distribution") dist_plot_name = "{}_{}_layer_act_distplot.png".format(output_filename, layer_dict['layer_name']) plt.savefig(os.path.join(layer_act_dist_path, dist_plot_name)) if test_run: plt.show() plt.close() # # load item change details """# # four possible states full model after_lesion code 1. 1 (correct) 0 (wrong) -1 2. 0 (wrong) 0 (wrong) 0 3. 1 (correct) 1 (correct) 1 4. 0 (wrong) 1 (correct) 2 """ item_change_df = pd.read_csv(f"{lesion_path}/{output_filename}_{lesion_layer_name}_item_change.csv", header=0, dtype=int, index_col=0) prop_change_df = pd.read_csv(f'{lesion_path}/{output_filename}_{lesion_layer_name}_prop_change.csv', header=0, # dtype=float, index_col=0) if verbose: print("\n*******************************************" f"\n{layer_index}. gha layer {gha_layer_number}: {gha_layer_name} \tlesion layer: {lesion_layer_name}" "\n*******************************************") # focussed_dict_print(hid_acts_dict[layer_index]) print(f"\n\thid_acts {gha_layer_name} shape: {hid_acts_df.shape}") print(f"\tloaded: {output_filename}_{lesion_layer_name}_item_change.csv: {item_change_df.shape}") units_per_layer = len(hid_acts_df.columns) print("\n\n\t**** loop through units ****") for unit_index, unit in enumerate(hid_acts_df.columns): if test_run: if unit_index > 2: continue # if selected_units != 'all': if selected_units is not False: if unit not in this_layer_units: # print(f"skipping unit {gha_layer_name} {unit}") continue else: print(f"\nrunning unit {gha_layer_name} {unit}") # # check unit is in sel_per_unit_dict if unit in sel_info[gha_layer_name].keys(): if verbose: print("found unit in dict") else: print("unit not in dict\n!!!!!DEAD RELU!!!!!!!!\n...on to the next unit\n") continue lesion_layer_and_unit = f"{lesion_layer_name}_{unit}" output_layer_and_unit = f"{lesion_layer_name}_{unit}" print("\n\n*************\n" f"running layer {layer_index} of {n_layers} ({gha_layer_name}): unit {unit} of {units_per_layer}\n" "************") # # make new df with just [item, hid_acts*, class, item_change*] *for this unit unit_df = item_change_df[["item", "class", lesion_layer_and_unit]].copy() # print(hid_acts_df) this_unit_hid_acts = hid_acts_df.loc[:, unit] # # check for dead relus if sum(np.ravel(this_unit_hid_acts)) == 0.0: print("\n\n!!!!!DEAD RELU!!!!!!!!...on to the next unit\n") continue if verbose: print(f"\tnot a dead unit, hid acts sum: {sum(np.ravel(this_unit_hid_acts)):.2f}") unit_df.insert(loc=1, column='hid_acts', value=this_unit_hid_acts) unit_df = unit_df.rename(index=str, columns={lesion_layer_and_unit: 'item_change'}) if verbose is True: print(f"\n\tall items - unit_df: {unit_df.shape}") # # remove rows where network failed originally and after lesioning this unit - uninteresting old_df_length = len(unit_df) unit_df = unit_df.loc[unit_df['item_change'] != 0] if verbose is True: n_fail_fail = old_df_length - len(unit_df) print(f"\n\t{n_fail_fail} fail-fail items removed - new shape unit_df: {unit_df.shape}") # # get items per class based on their occurences in the dataframe. # # this includes fail-pass, pass-pass and pass-fail - but not fail-fail no_fail_fail_ipc = unit_df['class'].value_counts(sort=False) df_ipc = dict() for i in range(n_cats): df_ipc[i] = no_fail_fail_ipc[i] # # # calculate the proportion of items that failed. # # # this is not the same as total_unit_change (which takes into account fail-pass as well as pass-fail) # df_ipc_total = sum(df_ipc.values()) # l_failed_df = unit_df[(unit_df['item_change'] == -1)] # l_failed_count = len(l_failed_df) # # print("\tdf_ipc_total: {}".format(df_ipc_total)) # print("\tl_failed_count: {}".format(l_failed_count)) # # getting max_class_drop max_class_drop_col = prop_change_df.loc[:, str(unit)] total_unit_change = max_class_drop_col['total'] max_class_drop_col = max_class_drop_col.drop(labels=['total']) max_class_drop_val = max_class_drop_col.min() max_drop_class = max_class_drop_col.idxmin() print(f"\n\tmax_class_drop_val: {max_class_drop_val}\n" f"\tmax_drop_class: {max_drop_class}\n" f"\ttotal_unit_change: {total_unit_change}") # # getting best sel measure (max_informed) main_sel_name = 'informedness' # # includes if statement since some units have not score (dead relu?) if old_sel_dict: main_sel_val = sel_dict['sel_info'][gha_layer_name][unit]['max']['informed'] main_sel_class = int(sel_dict['sel_info'][gha_layer_name][unit]['max']['c_informed']) else: # print(sel_info[gha_layer_name][unit]['max']) main_sel_val = sel_info[gha_layer_name][unit]['max']['max_informed'] main_sel_class = int(sel_info[gha_layer_name][unit]['max']['max_informed_c']) print(f"\tmain_sel_val: {main_sel_val}") print(f"\tmain_sel_class: {main_sel_class}") # # coi stands for Class Of Interest # # if doing oneVsAll I need to have a coi measure. (e.g., clas with max informed 'c_informed') if plot_type is "OneVsAll": # # get coi if coi_measure == 'max_class_drop': coi = max_drop_class elif coi_measure == 'c_informed': coi = main_sel_class else: coi = int(sel_dict['sel_info'][gha_layer_name][unit]['max'][coi_measure]) print(f"\n\tcoi: {coi} ({coi_measure})") # # get new class labels based on coi, OneVsAll all_classes_col = unit_df['class'].astype(int) one_v_all_class_list = [1 if x is coi else 0 for x in all_classes_col] print(f"\tall_classes_col: {len(all_classes_col)} one_v_all_class_list: {len(one_v_all_class_list)}") if 'OneVsAll' not in list(unit_df): print("\tadding 'OneVsAll'") print("\treplacing all classes with 'OneVsAll'class column") unit_df['class'] = one_v_all_class_list min_act = unit_df['hid_acts'].min() if normed_acts: if min_act >= 0.0: print("\nnormalising activations") this_unit_normed_acts = np.divide(unit_df['hid_acts'], unit_df['hid_acts'].max()) unit_df['normed'] = this_unit_normed_acts print(unit_df.head()) else: print("\ncan not do normed acts on this unit") normed_acts = False # # # did any items fail that were previously at zero print(f"\n\tsmallest activation on this layer was {min_act}") l_failed_df = unit_df[(unit_df['item_change'] == -1)] l_failed_df = l_failed_df.sort_values(by=['hid_acts']) min_failed_act = l_failed_df['hid_acts'].min() print(f"\n\tsmallest activation of items that failed after lesioning was {min_failed_act}") if min_failed_act == 0.0: fail_zero_df = l_failed_df.loc[l_failed_df['hid_acts'] == 0.0] fail_zero_count = len(fail_zero_df.index) print(f"\n\tfail_zero_df: {fail_zero_count} items\n\t{fail_zero_df.head()}") fail_zero_df.to_csv(f"{output_filename}_{gha_layer_name}_{unit}_fail_zero_df.csv", index=False) # # make plot of class changes # if plot_fails is True: if plot_class_change: class_prop_change = prop_change_df.iloc[:-1, unit].to_list() print(f"\n\tclass_prop_change: {class_prop_change}") # change scale if there are big changes class_change_x_min = -.5 if min(class_prop_change) < class_change_x_min: class_change_x_min = min(class_prop_change) class_change_x_max = .1 if max(class_prop_change) > class_change_x_max: class_change_x_max = max(class_prop_change) class_change_curve = sns.barplot(x=class_prop_change, y=class_labels, orient='h') class_change_curve.set_xlim([class_change_x_min, class_change_x_max]) class_change_curve.axvline(0, color="k", clip_on=False) plt.subplots_adjust(left=0.15) # just to fit the label 'automobile' on print(f'\nclass num: {class_prop_change.index(min(class_prop_change))}, ' f'class label: {class_labels[class_prop_change.index(min(class_prop_change))]}, ' f'class_val: {min(class_prop_change):.2f}' ) plt.title(f"{lesion_layer_and_unit}\n" f"total change: {total_unit_change:.2f} " f"max_class ({class_labels[class_prop_change.index(min(class_prop_change))]}): " f"{min(class_prop_change):.2f}") plt.savefig(f"{output_filename}_{output_layer_and_unit}_class_prop_change.png") if test_run: plt.show() plt.close() # # # # # # # # # # # # # # raincloud plots # # # # # # # # # # # # # # # # # plot title if plot_fails: title = f"Layer: {gha_layer_name} Unit: {unit}\nmax_class_drop: {max_class_drop_val:.2f} " \ f"({max_drop_class}), total change: {total_unit_change:.2f}\n" \ f"{main_sel_name}: {main_sel_val:.2f} ({main_sel_class})" if plot_type == "OneVsAll": title = f"Layer: {gha_layer_name} Unit: {unit} class: {coi}\n" \ f"max_class_drop: {max_class_drop_val:.2f} ({max_drop_class}), " \ f"total change: {total_unit_change:.2f}" \ "\n{main_sel_name}: {main_sel_val:.2f} ({main_sel_class})" else: title = f"Layer: {gha_layer_name} Unit: {unit}\n" \ f"{main_sel_name}: {main_sel_val:.2f} ({main_sel_class})" if plot_type == "OneVsAll": title = f"Layer: {gha_layer_name} Unit: {unit} class: {coi}\n" \ f"{main_sel_name}: {main_sel_val:.2f} ({main_sel_class})" print(f"\ntitle:\n{title}") # # # load main dataframe raincloud_data = unit_df # print(raincloud_data.head()) plot_y_vals = "class" # use_this_ipc = items_per_cat use_this_ipc = df_ipc if plot_type is "OneVsAll": print("\t\n\n\nUSE OneVsAll mode") n_cats = 2 items_per_coi = use_this_ipc[coi] other_items = sum(df_ipc.values()) - items_per_coi use_this_ipc = {0: other_items, 1: items_per_coi} print(f"\tcoi {coi}, items_per_cat {items_per_cat}") # # # choose colours use_colours = 'tab10' if 10 < n_cats < 21: use_colours = 'tab20' elif n_cats > 20: print("\tERROR - more classes than colours!?!?!?") sns.set_palette(palette=use_colours, n_colors=n_cats) # Make MULTI plot fig = plt.figure(figsize=(10, 5)) gs = gridspec.GridSpec(1, 2, width_ratios=[1, 4]) zeros_axis = plt.subplot(gs[0]) rain_axis = plt.subplot(gs[1]) # # # # # # # # # # # # # # make zeros plot # # # # # # # # # # # # # # # 1. get biggest class size (for max val of plot) max_class_size = max(use_this_ipc.values()) print(f"\tmax_class_size: {max_class_size}") # 2. get list or dict of zeros per class zeros_dict = {} for k in range(n_cats): if plot_type is "OneVsAll": plot_names = ["all_others", f"class_{coi}"] this_name = plot_names[k] this_class = unit_df.loc[unit_df['OneVsAll'] == k] zero_count = 0 - (this_class['hid_acts'] == 0).sum() zeros_dict[this_name] = zero_count else: this_class = unit_df.loc[unit_df['class'] == k] zero_count = 0 - (this_class['hid_acts'] == 0).sum() zeros_dict[k] = zero_count # zd_classes = list(zeros_dict.keys()) # zd_classes = list(lesion_dict['data_info']['cat_names'].values()) zd_zero_count = list(zeros_dict.values()) if verbose: print(f"\n\tzeros_dict:{zeros_dict.values()}, use_this_ipc:{use_this_ipc.values()}") zd_zero_perc = [x / y * 100 if y else 0 for x, y in zip(zeros_dict.values(), use_this_ipc.values())] zd_data = {"class": class_labels, "zero_count": zd_zero_count, "zero_perc": zd_zero_perc} zeros_dict_df = pd.DataFrame.from_dict(data=zd_data) # zero_plot sns.catplot(x="zero_perc", y="class", data=zeros_dict_df, kind="bar", orient='h', ax=zeros_axis) zeros_axis.set_xlabel("% at zero (height reflects n items)") zeros_axis.set_xlim([-100, 0]) # # set width of bar to reflect class size new_heights = [x / max_class_size for x in use_this_ipc.values()] print(f"\tuse_this_ipc: {use_this_ipc}\n\tnew_heights: {new_heights}") # def change_height(zeros_axis, new_value): patch_count = 0 for patch in zeros_axis.patches: current_height = patch.get_height() make_new_height = current_height * new_heights[patch_count] diff = current_height - make_new_height if new_heights[patch_count] < 1.0: # print("{}. current_height {}, new_height: {}".format(patch, current_height, make_new_height)) # # change the bar height patch.set_height(make_new_height) # # recenter the bar patch.set_y(patch.get_y() + diff * .65) patch_count = patch_count + 1 zeros_axis.set_xticklabels(['100', '50', '']) # zeros_axis.xaxis.set_major_locator(plt.MaxNLocator(1)) plt.close() # # # # # # # # # # # raincloud # # # # # # # # # # # data_values = "hid_acts" # float if normed_acts: data_values = 'normed' data_class = plot_y_vals # class orientation = "h" # orientation # cloud_plot pt.half_violinplot(data=raincloud_data, bw=.1, linewidth=.5, cut=0., width=1, inner=None, orient=orientation, x=data_values, y=data_class, scale="count") # scale="area" """# # rain_drops - plot 3 separate plots so that they are interesting items are ontop of pass-pass # # zorder is order in which items are printed # # item_change: 1 ('grey') passed before and after lesioning # # -1 ('red') passed in full model but failed when lesioned # # 2 ('green') failed in full model but passed in lesioning""" fail_palette = {1: "silver", -1: "red", 2: "green", 0: "orange"} # # separate rain drops for pass pass, pass_pass_df = unit_df[(unit_df['item_change'] == 1)] pass_pass_drops = sns.stripplot(data=pass_pass_df, x=data_values, y=data_class, jitter=1, zorder=1, size=2, orient=orientation) # , hue='item_change', palette=fail_palette) if plot_fails is True: '''I'm not using this atm, but if I want to plot items that originally failed and later passed''' # # separate raindrop for fail pass # fail_pass_df = unit_df[(unit_df['item_change'] == 2)] # if not fail_pass_df.empty: # fail_pass_drops = sns.stripplot(data=fail_pass_df, x=data_values, y=data_class, jitter=1, # zorder=3, size=4, orient=orientation, hue='item_change', # palette=fail_palette, edgecolor='gray', linewidth=.4, marker='s', # label='') # # separate raindrops for pass fail if not l_failed_df.empty: # pass_fail_drops sns.stripplot(data=l_failed_df, x=data_values, y=data_class, jitter=1, zorder=4, size=4, orient=orientation, hue='item_change', palette=fail_palette, edgecolor='white', linewidth=.4, marker='s') # box_plot sns.boxplot(data=raincloud_data, color="gray", orient=orientation, width=.15, x=data_values, y=data_class, zorder=2, showbox=False, # boxprops={'facecolor': 'none', "zorder": 2}, showfliers=False, showcaps=False, whiskerprops={'linewidth': .01, "zorder": 2}, saturation=1, # showwhiskers=False, medianprops={'linewidth': .01, "zorder": 2}, showmeans=True, meanprops={"marker": "*", "markerfacecolor": "white", "markeredgecolor": "black"} ) # # Finalize the figure rain_axis.set_xlabel("Unit activations") if normed_acts: rain_axis.set_xlabel("Unit activations (normalised)") # new_legend_text = ['l_passed', 'l_failed'] new_legend_text = ['l_failed'] leg = pass_pass_drops.axes.get_legend() if leg: # in here because leg is None if no items changed when this unit was lesioned for t, l in zip(leg.texts, new_legend_text): t.set_text(l) # # hid ticks and labels from rainplot plt.setp(rain_axis.get_yticklabels(), visible=False) rain_axis.axes.get_yaxis().set_visible(False) # # put plots together max_activation = max(this_unit_hid_acts) min_activation = min(this_unit_hid_acts) if normed_acts: max_activation = max(this_unit_normed_acts) min_activation = min(this_unit_normed_acts) max_x_val = max_activation * 1.05 layer_act_func = None for k, v in lesion_dict['model_info']['layers']['hid_layers'].items(): if v['name'] == gha_layer_name: layer_act_func = v['act_func'] break if layer_act_func in ['relu', 'Relu', 'ReLu']: min_x_val = 0 elif min_activation > 0.0: min_x_val = 0 else: min_x_val = min_activation rain_axis.set_xlim([min_x_val, max_x_val]) rain_axis.get_shared_y_axes().join(zeros_axis, rain_axis) fig.subplots_adjust(wspace=0) fig.suptitle(title, fontsize=12).set_position([.5, 1.0]) # .set_bbox([]) # # # add y axis back onto rainplot plt.axvline(x=min_x_val, linestyle="-", color='black', ) # # add marker for max informedness if 'info' in coi_measure: if old_sel_dict: normed_info_thr = sel_dict['sel_info'][gha_layer_name][unit]['max']['thr_informed'] else: print(sel_info[gha_layer_name][unit]['max']) normed_info_thr = sel_info[gha_layer_name][unit]['max']['max_info_thr'] if normed_acts: best_info_thr = normed_info_thr else: # unnormalise it best_info_thr = normed_info_thr * max(this_unit_hid_acts) print(f"\tbest_info_thr: {best_info_thr}") plt.axvline(x=best_info_thr, linestyle="--", color='grey') # sns.despine(right=True) if plot_type is "OneVsAll": plt.savefig(f"{output_filename}_{gha_layer_name}_{unit}_cat{coi}_raincloud.png") else: plt.savefig(f"{output_filename}_{gha_layer_name}_{unit}_raincloud.png") if test_run: plt.show() print("\n\tplot finished\n") # # clear for next round plt.close() # # plt.show() print("End of script")
def generate_figure(data_in, column, path_output): dx = np.ones(len(data_in[column])) dy = column hue = "Manufacturer" pal = ["#1E90FF", "#32CD32", "#FF0000"] f, ax = plt.subplots(figsize=(4, 6)) if column == 'CNR_single/t': coeff = 100 else: coeff = 1 ax = pt.half_violinplot(x=dx, y=dy, data=data_in * coeff, hue=hue, palette=pal, bw=.4, cut=0., linewidth=0., scale="area", width=.8, inner=None, orient="v", dodge=False, alpha=.4, offset=0.5) ax = sns.boxplot(x=dx, y=dy, data=data_in * coeff, hue=hue, color="black", palette=pal, showcaps=True, boxprops={ 'facecolor': 'none', "zorder": 10 }, showmeans=True, meanprops={ "marker": "^", "markerfacecolor": "black", "markeredgecolor": "black", "markersize": "8" }, showfliers=True, whiskerprops={ 'linewidth': 2, "zorder": 10 }, saturation=1, orient="v", dodge=True) ax = sns.stripplot(x=dx, y=dy, data=data_in * coeff, hue=hue, palette=pal, edgecolor="white", size=3, jitter=1, zorder=0, orient="v", dodge=True) plt.xlim([-1, 0.5]) handles, labels = ax.get_legend_handles_labels() # The code below doesn't work (the label for CNR is "GEGEGEGEGEGEG...") so i need to hard-code the labels (because # I don't have time to dig further). # _ = plt.legend(handles[0:len(labels) // 3], labels[0:len(labels) // 3], # bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., # title=str(hue)) _ = plt.legend(handles[0:3], ['Philips', 'Siemens', 'GE'], bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., title=str(hue)) f.gca().invert_xaxis() adjust_box_widths(f, 0.6) # special hack if column == 'CNR_single/t': plt.xlabel('CNR_single/√t') fname_out = os.path.join(path_output, 'figure_CNR_single_t') else: plt.xlabel(column) fname_out = os.path.join(path_output, 'figure_' + column) # remove ylabel plt.ylabel('') # hide xtick plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) plt.savefig(fname_out, bbox_inches='tight', dpi=300)
def compare_datasets(): def get_runtime_results(): df, features = load_dataset(module="POSIX", remove_runtime=True) df_rt, features_rt = load_dataset(module="POSIX", remove_runtime=False) posix_train, posix_test = train_on_split(df, features) runtime_train, runtime_test = train_on_split(df_rt, features_rt) return posix_train, posix_test, runtime_train, runtime_test def get_mpiio_results(): _, features_posix = load_dataset(module="POSIX", remove_runtime=True) _, features_mpiio = load_dataset(module="MPIIO", remove_runtime=True) df_both, features_both = load_dataset(module="both", remove_runtime=True) mpiio_train, mpiio_test = train_on_split(df_both, features_mpiio) both_train, both_test = train_on_split(df_both, features_both) return mpiio_train, mpiio_test, both_train, both_test def get_cobalt_results(multiple_allocations='ignore'): df, features = load_dataset(module="POSIX", remove_runtime=True) cobalt = pd.read_csv("data/cobalt_theta_2017_2020.csv") features_cobalt = features + [ "NODES_USED_LOG10", "USED_CORE_HOURS_LOG10" ] if multiple_allocations == 'ignore': alloc_sizes = df.groupby(["JOBID"]).size() df = df[df.JOBID.isin(alloc_sizes[alloc_sizes == 1].index)] df = pd.merge(df, cobalt, left_on=["JOBID"], right_on=["COBALT_JOBID"]) df["NODES_USED_LOG10"] = np.log10(df.NODES_USED) df["USED_CORE_HOURS_LOG10"] = np.log10(df.USED_CORE_HOURS) cobalt_train, cobalt_test = train_on_split(df, features_cobalt) return cobalt_train, cobalt_test posix_train, posix_test, runtime_train, runtime_test = get_runtime_results( ) mpiio_train, mpiio_test, both_train, both_test = get_mpiio_results() cobalt_train, cobalt_test = get_cobalt_results() # posix_train, posix_test, runtime_train, runtime_test, mpiio_train, mpiio_test, both_train, both_test, cobalt_train, cobalt_test results = pd.DataFrame({ 'error': np.concatenate([ posix_train, posix_test, runtime_train, runtime_test, mpiio_train, mpiio_test, both_train, both_test, cobalt_train, cobalt_test ]), 'set': ['train'] * posix_train.shape[0] + ['test'] * posix_test.shape[0] + ['train'] * runtime_train.shape[0] + ['test'] * runtime_test.shape[0] + ['train'] * mpiio_train.shape[0] + ['test'] * mpiio_test.shape[0] + ['train'] * both_train.shape[0] + ['test'] * both_test.shape[0] + ['train'] * cobalt_train.shape[0] + ['test'] * cobalt_test.shape[0], 'type': ['posix'] * (posix_train.shape[0] + posix_test.shape[0]) + ['runtime'] * (runtime_train.shape[0] + runtime_test.shape[0]) + ['mpiio'] * (mpiio_train.shape[0] + mpiio_test.shape[0]) + ['both'] * (both_train.shape[0] + both_test.shape[0]) + ['cobalt'] * (cobalt_train.shape[0] + cobalt_test.shape[0]) }) # Problems with log axes make me have to modify the data results.error = np.log10(results.error) # # Plotting # dx = 'type' dy = 'error' pal = "tab10" ort = 'v' df = results[results.error < np.log10(2)] plt.figure(figsize=(1.65 * 2, 2)) def sample_type_equally(df, sample): """ Given multiple types, makes sure each has equal representation """ types = set(df.type) dfs = [] for type in types: dfs.append(df[df.type == type].sample(sample)) return pd.concat(dfs) # # Top figure # ax = plt.subplot(211) pt.half_violinplot(x=dx, y=dy, data=df[df.set == 'train'], palette=pal, bw=.1, cut=0., scale="width", width=1., inner=None, orient=ort, linewidth=0.8, offset=0.2) sns.stripplot(x=dx, y=dy, data=sample_type_equally(df[df.set == 'train'], 500), palette=pal, edgecolor="white", size=1, jitter=1, zorder=1, orient=ort, alpha=0.5) sns.boxplot(x=dx, y=dy, data=df[df.set == 'train'], color="black", width=.2, zorder=10, showcaps=True, boxprops={ 'facecolor': 'none', "zorder": 10 }, showfliers=True, whiskerprops={ 'linewidth': 1, "zorder": 10 }, saturation=1, orient=ort, fliersize=0, linewidth=1) yticks = [1, 1.2, 1.5, 2] ax.set_yticks(np.log10(yticks)) ax.set_yticklabels([r"{:.2f} $\times$".format(y) for y in yticks]) plt.ylabel("Absolute Error") plt.xlim(-0.8, 4.3) plt.xticks([], []) plt.xlabel("") ax.set_title("Training set") # # Second figure # plt.subplot(212) ax = pt.half_violinplot(x=dx, y=dy, data=df[df.set == 'test'], palette=pal, bw=.1, cut=0., scale="width", width=1., inner=None, orient=ort, linewidth=0.8, offset=0.2) ax = sns.stripplot(x=dx, y=dy, data=sample_type_equally(df[df.set == 'test'], 500), palette=pal, edgecolor="white", size=1, jitter=1, zorder=1, orient=ort, alpha=0.5) ax = sns.boxplot(x=dx, y=dy, data=df[df.set == 'test'], color="black", width=.2, zorder=10, showcaps=True, boxprops={ 'facecolor': 'none', "zorder": 10 }, showfliers=True, whiskerprops={ 'linewidth': 1, "zorder": 10 }, saturation=1, orient=ort, fliersize=0, linewidth=1) yticks = [1, 1.2, 1.5, 2] ax.set_yticks(np.log10(yticks)) ax.set_yticklabels([r"{:.2f} $\times$".format(y) for y in yticks]) plt.ylabel("Absolute Error") plt.xlim(-0.8, 4.3) # plt.xticks(range(5), ["POSIX", "POSIX+runtime", "MPI-IO", "POSIX+MPI-IO", "POSIX+Cobalt"], rotation=30) plt.xticks(np.arange(5) - 0.5, [ "POSIX\nTest set median={:.2f}$\\times$".format( 10**results[(results.set == 'test') & (results.type == 'posix')].median().item()), "POSIX+runtime\nTest set median={:.2f}$\\times$".format( 10**results[(results.set == 'test') & (results.type == 'runtime')].median().item()), "MPI-IO\nTest set median={:.2f}$\\times$".format( 10**results[(results.set == 'test') & (results.type == 'mpiio')].median().item()), "POSIX+MPI-IO\nTest set median={:.2f}$\\times$".format( 10**results[(results.set == 'test') & (results.type == 'both')].median().item()), "POSIX+Cobalt\nTest set median={:.2f}$\\times$".format( 10**results[(results.set == 'test') & (results.type == 'cobalt')].median().item()) ], rotation=30, ha='right') plt.xlabel("") ax.set_title("Test set") plt.savefig("figures/figure_2.pdf", dpi=600, bbox_inches='tight')
ylabels = [ylabel_map[i] for i in order] # %% Plot with sns.plotting_context("paper", font_scale=1.3): fig, ax = plt.subplots(figsize=(LETTER_WIDTH_INCH, 5)) palette = "colorblind" ptitprince.half_violinplot( x="device", order=order, y="latency_ms", hue="os", hue_order=["Linux", "Windows"], data=df, ax=ax, palette=palette, split=True, inner=None, offset=0.3, ) for i in ax.collections: i.set_alpha(0.65) sns.stripplot( x="device", order=order, y="latency_ms", hue="os",
def plot_violins(df, count): """ Given I/O throughputs and predictions, plots violins of errors for different applications. """ top_apps = [c[0] for c in Counter(df.app).most_common()[:count]] df = df[df.app.isin(top_apps)] dx = 'app' dy = 'error' pal = "tab10" ort = "v" plt.figure(figsize=(1.65, 1.65)) import ptitprince as pt ax = pt.half_violinplot(x=dx, y=dy, data=df, palette=pal, bw=.1, cut=0., scale="width", width=1., inner=None, orient=ort, linewidth=0.8, offset=0.2) ax = sns.stripplot(x=dx, y=dy, data=sample_apps_equally(df, 1000), palette=pal, edgecolor="white", size=1, jitter=1, zorder=1, orient=ort, alpha=0.5) ax = sns.boxplot(x=dx, y=dy, data=df, color="black", width=.2, zorder=10, showcaps=True, boxprops={ 'facecolor': 'none', "zorder": 10 }, showfliers=True, whiskerprops={ 'linewidth': 1, "zorder": 10 }, saturation=1, orient=ort, fliersize=0, linewidth=1) yticks = [1 / 2, 1 / 1.5, 1 / 1.2, 1, 1.2, 1.5, 2] ax.set_yticks(np.log10(yticks)) ax.set_yticklabels([r"{:.2f} $\times$".format(y) for y in yticks]) ax.set_xticklabels(["Writer", "pw.x", "HACC", "IOR", "QB"], rotation=30) ax.set_xlabel("Application") ax.set_ylabel("Error") ax.set_axisbelow(True) plt.xlim(-0.8, 4.3) plt.ylim(np.log10(1 / 2), np.log10(2)) plt.savefig("figures/figure_1b.pdf", dpi=600, bbox_inches='tight')
# ################################################################ # Parameters plot ################################################################## fig, ax = plt.subplots(1, 4, figsize=(8, 5)) pal = sns.color_palette("deep", 5) labels = [r'$\omega_2$', r'$\beta_0$', r'$\beta_1$', r'$\zeta$'] for idx, var in enumerate(['om_2', 'be0', 'be1', 'ze']): data_param = data.groupby(['sub'])[var].mean().reset_index() dplot = data_param.melt(['sub']) pt.half_violinplot(x='variable', y="value", data=dplot, inner=None, jitter=True, color=pal[idx], lwidth=0, width=0.6, offset=0.17, cut=1, ax=ax[idx], linewidth=1, alpha=0.6, zorder=19) sns.stripplot(x='variable', y="value", data=dplot, jitter=0.08, ax=ax[idx], linewidth=1, alpha=0.6, color=pal[idx], zorder=1) sns.boxplot(x='variable', y="value", data=dplot, color=pal[idx], whis=np.inf, linewidth=1, ax=ax[idx], width=0.1, boxprops={"zorder": 10, 'alpha': 0.5}, whiskerprops={'zorder': 10, 'alpha': 1}, medianprops={'zorder': 11, 'alpha': 0.5}) ax[idx].set_xticklabels([labels[idx]], fontsize=param['labelfontsize']) if idx == 0: ax[idx].set_ylabel('Value', fontsize=param['labelfontsize']) else: ax[idx].set_ylabel('') ax[idx].set_xlabel('')
fs = [] for k in range(1): for i in ['fc','fs_5dis','fs_6dis']: fs+=[i for j in range(fs_ref.shape[1])] ref = [] for k in ['Rep_50']: ref += [k for i in range(3*fs_ref.shape[1])] data = pd.DataFrame({'icc':icc, 'icc_msr':icc_msr, 'icc_mse':icc_mse, 'fs':fs, 'ref': ref}) plt.figure(figsize=(20, 10)) sns.pointplot(x="ref", y="icc_msr", data=data, hue= 'fs', dodge=0.53, join=False, palette="dark",markers="d", scale=.75, ci='sd',capsize = 0.07) sns.stripplot(x="ref", y="icc_msr", data=data, hue= 'fs', size = 3, dodge=0.45, alpha = 0.05).set_title('Edge-wise ICC MSr') pt.half_violinplot(x="ref", y="icc_msr", data=data, hue= 'fs',scale = "area",inner = None, offset = 0.03, saturation=0.5) plt.legend(ncol=2) plt.savefig(plotd+'icc_msr.png')###### plt.close() plt.figure(figsize=(20, 10)) sns.pointplot(x="ref", y="icc_mse", data=data, hue= 'fs', dodge=0.53, join=False, palette="dark",markers="d", scale=.75, ci='sd',capsize = 0.07) sns.stripplot(x="ref", y="icc_mse", data=data, hue= 'fs', size = 3, dodge=0.45, alpha = 0.05).set_title('Edge-wise ICC MSe') pt.half_violinplot(x="ref", y="icc_mse", data=data, hue= 'fs',scale = "area",inner = None, offset = 0.03, saturation=0.5) t1,p1 = stats.ttest_ind(icc_mse[0:int(len(icc_mse)/3)],icc_mse[int(len(icc_mse)/3):int(len(icc_mse)/3*2)], nan_policy ='omit', equal_var=False) t2,p2 = stats.ttest_ind(icc_mse[0:int(len(icc_mse)/3)],icc_mse[int(len(icc_mse)/3*2):], nan_policy ='omit', equal_var=False) plt.text(-0.15,0,'T: '+str(round(t1,5))+'\n'+'P: '+str(round(p1,5)),fontsize=18) plt.text(0.15,0,'T: '+str(round(t2,5))+'\n'+'P: '+str(round(p2,5)),fontsize=18) plt.legend(ncol=2)
width_viol=.7, ax=ax4, orient=ort, alpha=.65, dodge=True, move=.2) ax4.get_legend().remove() ax4.set(yscale="log") fig.savefig(f"{figureRoot}/Pro.AnnotationsVsOthers.all.pdf") ax = pt.half_violinplot(x=dx, y="SignalVEH.+", data=df, palette=pal, bw=.2, cut=0., scale="area", width=.6, inner=None, orient=ort) ax = sns.stripplot(x=dx, y=dy, data=df, palette=pal, edgecolor="white", size=3, jitter=1, zorder=0, orient=ort)
def plot_violins(df): df = df[df.set != "train"] df = df[df.error < np.log10(10)] dx = 'type' dy = 'error' pal = "tab10" ort = 'v' # # First graph # plt.figure(figsize=(1.65 * 2, 1.4)) plt.subplots_adjust(wspace=0.05, left=0, right=1) plt.subplot(121) ax = pt.half_violinplot(x=dx, y=dy, data=df[df.set == 'test'], palette=pal, bw=.1, cut=0., scale="width", width=1., inner=None, orient=ort, linewidth=0.8, offset=0.2) ax = sns.stripplot(x=dx, y=dy, data=df[df.set == 'test'].sample(2000), palette=pal, edgecolor="white", size=1, jitter=1, zorder=1, orient=ort, alpha=0.5) ax = sns.boxplot(x=dx, y=dy, data=df[df.set == 'test'], color="black", width=.2, zorder=10, showcaps=True, boxprops={ 'facecolor': 'none', "zorder": 10 }, showfliers=True, whiskerprops={ 'linewidth': 1, "zorder": 10 }, saturation=1, orient=ort, fliersize=0, linewidth=1) plt.xlim(-0.8, 1.3) ax.set_xticklabels([ "Baseline\nMedian={:.2f}$\\times$".format( 10**df[(df.set == 'test') & (df.type == 'baseline')].error.median()), "Normalized\nMedian={:.2f}$\\times$".format( 10**df[(df.set == 'test') & (df.type == 'normalized')].error.median()) ]) ax.set_xlabel("") yticks = [1, 1.2, 1.5, 2, 5, 10] ax.set_yticks(np.log10(yticks)) ax.set_yticklabels([r"{:.2f} $\times$".format(y) for y in yticks]) plt.ylabel("Absolute Error") ax.set_title("Test set") # # Second graph # plt.subplot(122) ax = pt.half_violinplot(x=dx, y=dy, data=df[df.set == '2020'], palette=pal, bw=.1, cut=0., scale="width", width=1., inner=None, orient=ort, linewidth=0.8, offset=0.2) ax = sns.stripplot(x=dx, y=dy, data=df[df.set == '2020'].sample(2000), palette=pal, edgecolor="white", size=1, jitter=1, zorder=1, orient=ort, alpha=0.5) ax = sns.boxplot(x=dx, y=dy, data=df[df.set == '2020'], color="black", width=.2, zorder=10, showcaps=True, boxprops={ 'facecolor': 'none', "zorder": 10 }, showfliers=True, whiskerprops={ 'linewidth': 1, "zorder": 10 }, saturation=1, orient=ort, fliersize=0, linewidth=1) ax.set_xticklabels([ "Baseline\nMedian={:.2f}$\\times$".format( 10**df[(df.set == '2020') & (df.type == 'baseline')].error.median()), "Normalized\nMedian={:.2f}$\\times$".format( 10**df[(df.set == '2020') & (df.type == 'normalized')].error.median()) ]) ax.set_xlabel("") plt.xlim(-0.8, 1.3) ax.set_yticks(np.log10(yticks)) ax.set_yticklabels(["" for y in yticks]) plt.ylabel("") ax.set_title("2020 set") plt.savefig("figures/figure_8.pdf", dpi=600, bbox_inches='tight')