def compare_posteriors_with_different_data(cfg_name, model, t, replace_indices, params) -> None: plt.clf() plt.close() fig, axarr = plt.subplots(nrows=1, ncols=len(params)) colours = cm.viridis(np.linspace(0.2, 0.8, len(replace_indices))) for j, replace_index in enumerate(replace_indices): for i, p in enumerate(params): samples = results_utils.get_posterior_samples( cfg_name, iter_range=(t, t + 1), model=model, replace_index=replace_index, params=[p]) sns.distplot(samples, ax=axarr[i], color=to_hex(colours[j]), label=str(replace_index), kde=False) # save for i, p in enumerate(params): axarr[i].set_xlabel('parameter ' + p) axarr[0].set_title('iteration ' + str(t)) axarr[-1].legend() vis_utils.beautify_axes(axarr) return
def weight_posterior(cfg_name, model, replace_indices='random', t=500, param='#0', n_bins=25): """ """ iter_range = (t, t + 1) nolegend = False if replace_indices == 'random': print('Picking two *random* replace indices for this setting...') df = results_utils.get_available_results(cfg_name, model) replace_counts = df['replace'].value_counts() replaces = replace_counts[replace_counts > 2].index.values replace_indices = np.random.choice(replaces, 2, replace=False).tolist() elif type(replace_indices) == int: replace_indices = [replace_indices] nolegend = True assert type(replace_indices) == list # Set up the plot fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=(4, 2.5)) # now load the data! for replace_index in replace_indices: df = results_utils.get_posterior_samples(cfg_name, iter_range, model, replace_index=replace_index, params=[param], seeds='all') sns.distplot(df[param], ax=axarr, label=f'D\{replace_index}', kde=True, bins=n_bins, norm_hist=True) axarr.set_xlabel('weight ' + param) if not nolegend: axarr.legend() axarr.set_ylabel('# runs') vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() plot_identifier = f'weight_posterior_{cfg_name}_{param}' plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.png')) plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.pdf')) return
def plot_sigmas_distribution(model, cfg_names=None, ylim=None) -> None: if model == 'logistic': convergence_points = em.lr_convergence_points title = 'Logistic regression' else: convergence_points = em.nn_convergence_points title = 'Neural network' if cfg_names is None: cfg_names = convergence_points.keys() fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=(3, 3)) for ds in cfg_names: t = convergence_points[ds] # now just the sigmas distribution all_sigmas = dr.Sigmas(ds, model, t).load(diffinit=True)['sigmas'] # lose the nans all_sigmas = all_sigmas[~np.isnan(all_sigmas)] min_sigma = np.nanmin(all_sigmas) sns.distplot(all_sigmas - min_sigma, ax=axarr, norm_hist=True, label=em.dataset_names[ds], color=to_rgba(em.dataset_colours[ds]), kde=False, bins=50) percentiles = np.percentile(all_sigmas, [0, 0.25, 0.5, 0.75, 1]) print(ds, len(all_sigmas)) print(percentiles) axarr.set_xlabel('variability estimate') axarr.set_ylabel('density') axarr.set_title(title) axarr.legend() if ylim is not None: axarr.set_ylim(ylim) axarr.set_xlim(0, None) vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() plot_identifier = f'stability_sigmas_dist_{model}' plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.png')) plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.pdf')) plt.clf() plt.close() return
def multivariate_normal_test_vis(df, logscale: bool = False) -> None: fig, axarr = plt.subplots(nrows=3, ncols=1, sharex=True) axarr[-1].set_xlabel('N') ns = df['n'].unique() ds = df['d'].unique() colours = cm.viridis(np.linspace(0, 1, len(ds))) for i, d in enumerate(ds): df_d = df[df['d'] == d] for j, label in enumerate( ['pval_diagonal_gauss', 'pval_nondiag_gauss', 'pval_laplace']): val_mean = df_d[[label, 'n']].groupby('n').mean() val_std = df_d[[label, 'n']].groupby('n').std() uh = axarr[j].plot(val_mean.index, val_mean.values[:, 0], color=colours[i], label=d) axarr[j].fill_between(val_mean.index, (val_mean - val_std).values[:, 0], (val_mean + val_std).values[:, 0], color=colours[i], alpha=0.1) fig.colorbar(plt.cm.ScalarMappable(plt.Normalize(vmin=min(ds), vmax=max(ds)), cmap='viridis'), ax=axarr, label='dimension', drawedges=False, ticks=ds) axarr[0].set_ylabel('pval\nMVN') axarr[1].set_ylabel('pval\nMVNd') axarr[2].set_ylabel('pval\nlaplace') for ax in axarr: #ax.legend() ax.axhline(y=0.05, ls='--', color='red', alpha=0.5) if logscale: ax.set_yscale('log') ax.set_ylim(1e-5, 1) else: ax.set_ylim(0, 1) vis_utils.beautify_axes(axarr) plt.savefig(PLOTS_DIR / f'multivar_test{"_log"*logscale}.png') plt.savefig(PLOTS_DIR / f'multivar_test{"_log"*logscale}.pdf') plt.clf() plt.close() return
def visualise_weight_trajectory(cfg_name, identifiers, df=None, save=True, iter_range=(None, None), params=['#4', '#2'], include_optimum=False, include_autocorrelation=False, diffinit=False) -> None: """ """ df_list = [] for identifier in identifiers: model = identifier['model'] replace_index = identifier['replace'] seed = identifier['seed'] experiment = results_utils.ExperimentIdentifier( cfg_name, model, replace_index, seed, diffinit) df = experiment.load_weights(iter_range=iter_range, params=params) df_list.append(df) colors = cm.viridis(np.linspace(0.2, 0.8, len(df_list))) labels = [':'.join(x) for x in identifiers] if params is None: if len(df.columns) > 6: print('WARNING: No parameters indicated, choosing randomly...') params = np.random.choice(df_list[0].columns[1:], 4, replace=False) else: print('WARNING: No parameters indicated, selecting all') params = df_list[0].columns[1:] for p in params: for df in df_list: assert p in df.columns if include_optimum: # hack! optimum, hessian = data_utils.solve_with_linear_regression(cfg_name) if include_autocorrelation: ncols = 2 else: ncols = 1 fig, axarr = plt.subplots(nrows=len(params), ncols=ncols, sharex='col', figsize=(4 * ncols, 1.5 * len(params) + 1)) firstcol = axarr[:, 0] if include_autocorrelation else axarr for k, df in enumerate(df_list): color = to_hex(colors[k]) for i, p in enumerate(params): firstcol[i].scatter(df['t'], df[p], c=color, alpha=1, s=4, label=labels[k]) firstcol[i].plot(df['t'], df[p], c=color, alpha=0.75, label='_nolegend_') firstcol[i].set_ylabel('param: ' + str(p)) if include_optimum: firstcol[i].axhline(y=optimum[int(p[1:])], ls='--', color='red', alpha=0.5) firstcol[0].set_title('weight trajectory') firstcol[-1].set_xlabel('training steps') firstcol[0].legend() if include_autocorrelation: n_lags = 100 autocorr = np.zeros(n_lags) axarr[0, 1].set_title('autocorrelation of weight trajectory') for i, p in enumerate(params): for lag in range(n_lags): autocorr[lag] = df[p].autocorr(lag=lag) axarr[i, 1].plot(range(n_lags), autocorr, alpha=0.5, color=color) axarr[i, 1].scatter(range(n_lags), autocorr, s=4, zorder=2, color=color) axarr[i, 1].set_ylabel(p) axarr[i, 1].axhline(y=0, ls='--', alpha=0.5, color='black') axarr[-1, 1].set_xlabel('lag') vis_utils.beautify_axes(axarr) plt.tight_layout() if save: plot_identifier = f'weights_{cfg_name}_{"_".join(labels)}' plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.png')) plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.pdf')) plt.clf() plt.close() return
def visualise_trace(cfg_names, models, replaces, seeds, privacys, save=True, include_batches=False, iter_range=(None, None), include_convergence=True, diffinit=False, convergence_tolerance=3, include_vali=True, labels=None) -> None: """ Show the full training set loss as well as the gradient (at our element) over training """ identifiers = vis_utils.process_identifiers(cfg_names, models, replaces, seeds, privacys) print(identifiers) if len(identifiers) > 1: print( 'WARNING: When more than one experiment is included, we turn off visualisation of batches to avoid cluttering the plot' ) include_batches = False if labels is None: labels = [ f'{x["cfg_name"]}-{x["model"]}-{x["replace"]}-{x["seed"]}' for x in identifiers ] else: assert len(labels) == len(identifiers) loss_list = [] for identifier in identifiers: cfg_name = identifier['cfg_name'] model = identifier['model'] replace_index = identifier['replace'] seed = identifier['seed'] data_privacy = identifier['data_privacy'] experiment = results_utils.ExperimentIdentifier( cfg_name, model, replace_index, seed, data_privacy=data_privacy, diffinit=diffinit) df_loss = experiment.load_loss(iter_range=iter_range) if df_loss is False: print('No fit data available for identifier:', identifier) df_loss = [] loss_list.append(df_loss) if len(loss_list) == 0: print('Error: no valid data') return False if include_batches: minibatch_ids = loss_list[0]['minibatch_id'].unique() colormap = dict( zip(minibatch_ids, cm.viridis(np.linspace(0, 1, len(minibatch_ids))))) colours = cm.viridis(np.linspace(0.2, 0.8, len(loss_list))) # what metrics were recorded for this run? metrics = loss_list[0].columns[2:] print('Visualising trace of', identifiers, 'with metrics', metrics) nrows = len(metrics) fig, axarr = plt.subplots(nrows=nrows, ncols=1, sharex='col', figsize=(4, 3.2)) if nrows == 1: axarr = np.array([axarr]) for j, df in enumerate(loss_list): # this is just for the purpose of plotting the overall, not batches df_train = df.loc[df['minibatch_id'] == 'ALL', :] df_vali = df.loc[df['minibatch_id'] == 'VALI', :] # plot all for i, metric in enumerate(metrics): axarr[i].scatter(df_train['t'], df_train[metric], s=4, color=colours[j], zorder=2, label='_nolegend_', alpha=0.5) axarr[i].plot(df_train['t'], df_train[metric], alpha=0.25, color=colours[j], zorder=2, label=labels[j]) if include_vali: axarr[i].plot(df_vali['t'], df_vali[metric], ls='--', color=colours[j], zorder=2, label='_nolegend_', alpha=0.5) axarr[i].legend() if metric in ['mse']: axarr[i].set_yscale('log') axarr[i].set_ylabel(re.sub('_', '\n', metric)) if include_batches: axarr[i].scatter(df['t'], df[metric], c=[colormap[x] for x in df['minibatch_id']], s=4, alpha=0.2, zorder=0) for minibatch_idx in df['minibatch_id'].unique(): df_temp = df.loc[df['minibatch_id'] == minibatch_idx, :] axarr[i].plot(df_temp['t'], df_temp[metric], c=colormap[minibatch_idx], alpha=0.1, zorder=0) if include_convergence: for j, identifier in enumerate(identifiers): cfg_name = identifier['cfg_name'] model = identifier['model'] replace_index = identifier['replace'] seed = identifier['seed'] data_privacy = identifier['data_privacy'] convergence_point = dr.find_convergence_point_for_single_experiment( cfg_name, model, replace_index, seed, diffinit, tolerance=convergence_tolerance, metric=metrics[0], data_privacy=data_privacy) print('Convergence point:', convergence_point) for ax in axarr: ax.axvline(x=convergence_point, ls='--', color=colours[j]) axarr[-1].set_xlabel('training steps') vis_utils.beautify_axes(axarr) plt.tight_layout() if save: plot_label = '__'.join( [f'r{x["replace"]}-s{x["seed"]}' for x in identifiers]) plot_identifier = f'trace_{cfg_name}_{plot_label}' plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.png')) plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.pdf')) plt.clf() plt.close() return
def fit_pval_histogram(what, cfg_name, model, t, n_experiments=3, diffinit=False, xlim=None, seed=1) -> None: """ histogram of p-values (across parameters-?) for a given model etc. """ assert what in ['weights', 'gradients'] # set some stuff up iter_range = (t, t + 1) fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=(3.5, 2.1)) pval_colour = '#b237c4' # sample experiments df = results_utils.get_available_results(cfg_name, model, diffinit=diffinit) replace_indices = df['replace'].unique() replace_indices = np.random.choice(replace_indices, n_experiments, replace=False) print('Looking at replace indices...', replace_indices) all_pvals = [] for i, replace_index in enumerate(replace_indices): experiment = results_utils.ExperimentIdentifier( cfg_name, model, replace_index, seed, diffinit) if what == 'gradients': print('Loading gradients...') df = experiment.load_gradients(noise=True, iter_range=iter_range, params=None) second_col = df.columns[1] elif what == 'weights': df = results_utils.get_posterior_samples( cfg_name, iter_range=iter_range, model=model, replace_index=replace_index, params=None, seeds='all') second_col = df.columns[1] params = df.columns[2:] n_params = len(params) print(n_params) if n_params < 50: print( 'ERROR: Insufficient parameters for this kind of visualisation, please try something else' ) return False print('Identified', n_params, 'parameters, proceeding with analysis') p_vals = np.zeros(shape=(n_params)) for j, p in enumerate(params): print('getting fit for parameter', p) df_fit = dr.estimate_statistics_through_training( what=what, cfg_name=None, model=None, replace_index=None, seed=None, df=df.loc[:, ['t', second_col, p]], params=None, iter_range=None) p_vals[j] = df_fit.loc[t, 'norm_p'] del df_fit log_pvals = np.log(p_vals) all_pvals.append(log_pvals) log_pvals = np.concatenate(all_pvals) if xlim is not None: # remove values below the limit number_below = (log_pvals < xlim[0]).sum() print('There are', number_below, 'p-values below the limit of', xlim[0]) log_pvals = log_pvals[log_pvals > xlim[0]] print('Remaining pvals:', len(log_pvals)) sns.distplot(log_pvals, kde=True, bins=min(100, int(len(log_pvals) * 0.25)), ax=axarr, color=pval_colour, norm_hist=True) axarr.axvline(x=np.log(0.05), ls=':', label='p = 0.05', color='black', alpha=0.75) axarr.axvline(x=np.log(0.05 / n_params), ls='--', label='p = 0.05/' + str(n_params), color='black', alpha=0.75) axarr.legend() axarr.set_xlabel(r'$\log(p)$') axarr.set_ylabel('density') if xlim is not None: axarr.set_xlim(xlim) else: axarr.set_xlim((None, 0.01)) # axarr.set_xscale('log') vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() plot_identifier = f'pval_histogram_{cfg_name}_{model}_{what}' plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.png')) plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.pdf')) return
def weight_evolution(cfg_name, model, n_seeds=50, replace_indices=None, iter_range=(None, None), params=['#4', '#2'], diffinit=False, aggregate=False): plt.clf() plt.close() fig, axarr = plt.subplots(nrows=len(params), ncols=1, sharex=True, figsize=(4, 3)) if aggregate: colours = cm.get_cmap('Set1')(np.linspace(0.2, 0.8, len(replace_indices))) assert n_seeds > 1 for i, replace_index in enumerate(replace_indices): vary_S = results_utils.get_posterior_samples( cfg_name, iter_range, model, replace_index=replace_index, params=params, seeds='all', n_seeds=n_seeds, diffinit=diffinit) vary_S_min = vary_S.groupby('t').min() vary_S_std = vary_S.groupby('t').std() vary_S_max = vary_S.groupby('t').max() vary_S_mean = vary_S.groupby('t').mean() for j, p in enumerate(params): axarr[j].fill_between(vary_S_min.index, vary_S_min[p], vary_S_max[p], alpha=0.1, color=colours[i], label='_legend_') axarr[j].fill_between(vary_S_mean.index, vary_S_mean[p] - vary_S_std[p], vary_S_mean[p] + vary_S_std[p], alpha=0.1, color=colours[i], label='_nolegend_', linestyle='--') axarr[j].plot(vary_S_min.index, vary_S_mean[p], color=colours[i], alpha=0.7, label='D -' + str(replace_index)) axarr[j].set_ylabel('weight ' + p) else: colours = cm.get_cmap('plasma')(np.linspace(0.2, 0.8, n_seeds)) assert len(replace_indices) == 1 replace_index = replace_indices[0] vary_S = results_utils.get_posterior_samples( cfg_name, iter_range, model, replace_index=replace_index, params=params, seeds='all', n_seeds=n_seeds, diffinit=diffinit) seeds = vary_S['seed'].unique() for i, s in enumerate(seeds): vary_Ss = vary_S.loc[vary_S['seed'] == s, :] for j, p in enumerate(params): axarr[j].plot(vary_Ss['t'], vary_Ss[p], color=colours[i], label='seed ' + str(s), alpha=0.8) if i == 0: axarr[j].set_ylabel(r'$\mathbf{w}^{' + p[1:] + '}$') axarr[-1].set_xlabel('training steps') vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() plot_identifier = f'weight_trajectory_{cfg_name}.{model}' plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.png')) plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.pdf')) return
def qq_plot(what: str, cfg_name: str, model: str, replace_index: int, seed: int, times=[50], params='random') -> None: """ grab trace file, do qq plot for gradient noise at specified time-point """ plt.clf() plt.close() assert what in ['gradients', 'weights'] if what == 'weights': print('Looking at weights, this means we consider all seeds!') colours = cm.viridis(np.linspace(0.2, 0.8, len(times))) experiment = results_utils.ExperimentIdentifier(cfg_name, model, replace_index, seed) if params == 'random': if what == 'gradients': df = experiment.load_gradients(noise=True, params=None, iter_range=(min(times), max(times) + 1)) else: df = results_utils.get_posterior_samples( cfg_name, model=model, replace_index=replace_index, iter_range=(min(times), max(times) + 1), params=None) params = np.random.choice(df.columns[2:], 1) print('picking random parameter', params) first_two_cols = df.columns[:2].tolist() df = df.loc[:, first_two_cols + list(params)] else: if what == 'gradients': df = experiment.load_gradients(noise=True, params=params, iter_range=(min(times), max(times) + 1)) else: df = results_utils.get_posterior_samples( cfg_name, model=model, replace_index=replace_index, iter_range=(min(times), max(times) + 1), params=params) if df is False: print('ERROR: No data available') return False fig, axarr = plt.subplots(nrows=1, ncols=2, figsize=(7, 3.5)) for i, t in enumerate(times): df_t = df.loc[df['t'] == t, :] X = df_t.iloc[:, 2:].values.flatten() print('number of samples:', X.shape[0]) sns.distplot(X, ax=axarr[0], kde=False, color=to_hex(colours[i]), label=str(t)) sm.qqplot(X, line='45', fit=True, ax=axarr[1], c=colours[i], alpha=0.5, label=str(t)) plt.suptitle('cfg_name: ' + cfg_name + ', model:' + model + ',' + what) axarr[0].legend() axarr[1].legend() axarr[0].set_xlabel('parameter:' + '.'.join(params)) vis_utils.beautify_axes(axarr) plt.tight_layout() plot_identifier = f'qq_{what}_{cfg_name}_{model}_{"_".join(params)}' plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.png')) plt.savefig((PLOTS_DIR / plot_identifier).with_suffix('.pdf')) return
def overlay_pval_plot(model='logistic', xlim=None, n_experiments=50, cfg_names=None, ylim=None) -> None: """ want to overlay pvals from the four datasets in one plot """ what = 'weights' figsize = (3.7, 3.05) if model == 'logistic': convergence_points = em.lr_convergence_points title = 'Logistic regression' else: convergence_points = em.nn_convergence_points title = 'Neural network' if cfg_names is None: cfg_names = em.dataset_colours.keys() plot_label = '_' else: plot_label = '_'.join(cfg_names) + '_' fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=figsize) vertical_lines_we_already_have = set() for ds in cfg_names: print(ds) log_pvals, n_params = vis_utils.fit_pval_histogram( what=what, dataset=ds, model=model, t=convergence_points[ds], n_experiments=n_experiments, plot=False) sns.distplot(log_pvals, kde=True, bins=min(100, int(len(log_pvals) * 0.25)), ax=axarr, color=em.dataset_colours[ds], norm_hist=True, label=em.get_dataset_name(ds), kde_kws={'alpha': 0.6}) if n_params not in vertical_lines_we_already_have: axarr.axvline(x=np.log(0.05 / (n_params * n_experiments)), ls='--', label='p = 0.05/' + str(n_params * n_experiments), color=em.dataset_colours[ds], alpha=0.75) vertical_lines_we_already_have.add(n_params) axarr.axvline(x=np.log(0.05), ls=':', label='p = 0.05', color='black', alpha=0.75) axarr.legend() axarr.set_xlabel(r'$\log(p)$') axarr.set_ylabel('density') axarr.set_title(title) if ylim is not None: axarr.set_ylim(ylim) if xlim is not None: axarr.set_xlim(xlim) else: axarr.set_xlim((None, 0.01)) vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() figure_identifier = f'pval_histogram_{plot_label}_{model}' plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.png')) plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.pdf')) return
def plot_stability_of_estimated_values(cfg_name, model, t) -> None: stability = dr.Stability(cfg_name, model, t) stability_dict = stability.load() # lets just do 3 separate plots figsize = (3.5, 2.8) size = 6 # SIGMA V N SEEDS print('Plotting sigma v seeds') sigma_df = stability_dict['sigma'] sigma_v_seed = sigma_df[['num_seeds', 'sigma']] fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=figsize) axarr.scatter(sigma_v_seed['num_seeds'], sigma_v_seed['sigma'], s=size, c=em.dp_colours['augment_diffinit']) sigma_we_use = dr.estimate_variability(cfg_name, model, t, diffinit=True) axarr.axhline(y=sigma_we_use, ls='--', c=em.dp_colours['augment_diffinit'], alpha=0.4) axarr.set_xlabel('number of random seeds') axarr.set_ylabel(r'estimated $\sigma_i(\mathcal{D})$') axarr.set_title( em.get_dataset_name(cfg_name) + ' (' + em.model_names[model] + ')') upper_y = 1.05 * max(np.max(sigma_v_seed['sigma']), sigma_we_use) lower_y = 0.95 * np.min(sigma_v_seed['sigma']) axarr.set_ylim(lower_y, upper_y) vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() figure_identifier = f'stability_sigma_v_seeds_{cfg_name}_{model}_t{t}' plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.png')) plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.pdf')) plt.clf() plt.close() # With fixed num_deltas, sensitivity print('Plotting sens v num deltas') sens_df = stability_dict['sens'] sens_v_deltas = sens_df[['num_deltas', 'sens']].drop_duplicates() fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=figsize) axarr.scatter(sens_v_deltas['num_deltas'], sens_v_deltas['sens'], s=size, c=em.dp_colours['bolton']) sens_we_use = dr.estimate_sensitivity_empirically(cfg_name, model, t, num_deltas='max', diffinit=True, data_privacy='all') axarr.axhline(y=sens_we_use, ls='--', c=em.dp_colours['bolton'], alpha=0.4) axarr.set_xlabel('number of dataset comparisons') axarr.set_ylabel('estimated sensitivity') axarr.set_ylim(0, None) axarr.set_xscale('log') axarr.set_title( em.get_dataset_name(cfg_name) + ' (' + em.model_names[model] + ')') vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() figure_identifier = f'stability_sens_v_deltas_{cfg_name}_{model}_t{t}' plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.png')) plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.pdf')) plt.clf() plt.close() return
def plot_distance_v_time(cfg_name, model, num_pairs='max', convergence_point=None) -> None: """ This will take precedence over the normal sens_var_over_time one """ df = dr.VersusTime(cfg_name, model).load() # Get distance (vary seed) distance_columns = [x for x in df.columns if 'distance' in x] df_distance = df[['t'] + distance_columns] df_distance.dropna(axis=0, inplace=True) # Get sensitivity (vary data) df_sens = df[['t', 'theoretical_sensitivity', 'empirical_sensitivity']] if model in ['mlp', 'cnn']: df_sens.drop(columns='theoretical_sensitivity', inplace=True) else: # discretise the sensitivity ds = [np.nan] * df_sens.shape[0] for i, ts in enumerate(df_sens['theoretical_sensitivity'].values): ds[i] = test_private_model.discretise_theoretical_sensitivity( cfg_name, model, ts) df_sens['theoretical_sensitivity_discretised'] = ds df_sens.dropna(axis=0, inplace=True) # Now plot size = 6 fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=(4, 2.1)) # First distance (vary seed) t = df_distance['t'] which_colours = { 'fixinit': em.dp_colours['augment'], 'diffinit': em.dp_colours['augment_diffinit'] } which_labels = {'fixinit': np.nan, 'diffinit': r'$\Delta_V^{vary}$'} for which in ['diffinit']: # not interested in fixinit min_dist = df_distance[f'min_{which}_distance'] mean_dist = df_distance[f'mean_{which}_distance'] max_dist = df_distance[f'max_{which}_distance'] std_dist = df_distance[f'std_{which}_distance'] axarr.plot(t, mean_dist, label=which_labels[which], color=which_colours[which], alpha=0.5) axarr.scatter(t, mean_dist, color=which_colours[which], label='_nolegend_', s=size) axarr.fill_between(t, mean_dist - std_dist, mean_dist + std_dist, alpha=0.2, label='_nolegend_', color=which_colours[which]) axarr.fill_between(t, min_dist, max_dist, alpha=0.1, label='_nolegend_', color=which_colours[which]) # Now sensitivity (vary data) t = df_sens['t'] if 'theoretical_sensitivity_discretised' in df_sens: axarr.plot(t, df_sens['theoretical_sensitivity_discretised'], label=r'$\hat{\Delta}_S$', alpha=0.5, c=em.dp_colours['bolton'], ls='--') axarr.scatter(t, df_sens['empirical_sensitivity'], label='_nolegend_', s=size, c=em.dp_colours['bolton']) axarr.plot(t, df_sens['empirical_sensitivity'], label=r'$\hat{\Delta}^*_S$', alpha=0.5, c=em.dp_colours['bolton']) if convergence_point is not None: # add a vertical line axarr.axvline(x=convergence_point, ls='--', alpha=0.5, color='black') # Now save and stuff axarr.legend() axarr.set_ylabel(r'$\|w - w^\prime\|$') axarr.set_xlabel('training steps') xmin, _ = axarr.get_xlim() # this is a hack for mnist axarr.set_xlim(xmin, t.max()) vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() figure_identifier = f'distance_v_time_{cfg_name}' plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.png')) plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.pdf')) return
def plot_delta_histogram(cfg_name: str, model: str, num_deltas='max', t=500, include_bounds=False, xlim=None, ylim=None, data_privacy='all', multivariate=False) -> None: if multivariate: raise NotImplementedError('Multivariate plotting is not implemented') delta_histogram = dr.DeltaHistogram(cfg_name, model, num_deltas, t, data_privacy, multivariate) plot_data = delta_histogram.load(diffinit=False) plot_data_diffinit = delta_histogram.load(diffinit=True) vary_both = plot_data['vary_both'] vary_S = plot_data['vary_S'] vary_r = plot_data['vary_r'] vary_both_diffinit = plot_data_diffinit['vary_both'] vary_S_diffinit = plot_data_diffinit['vary_S'] vary_r_diffinit = plot_data_diffinit['vary_r'] # remove NANs vary_both = vary_both[~np.isnan(vary_both)] vary_S = vary_S[~np.isnan(vary_S)] vary_r = vary_r[~np.isnan(vary_r)] vary_both_diffinit = vary_both_diffinit[~np.isnan(vary_both_diffinit)] vary_S_diffinit = vary_S_diffinit[~np.isnan(vary_S_diffinit)] vary_r_diffinit = vary_r_diffinit[~np.isnan(vary_r_diffinit)] # merge vary_S for the different initialisations vary_S = np.concatenate([vary_S, vary_S_diffinit]) plt.clf() plt.close() fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=(4, 2.1)) print('Plotting varying S... number of deltas:', vary_S.shape[0]) sns.distplot(vary_S, ax=axarr, color=em.dp_colours['bolton'], label=r'$\Delta_S$', kde=True, norm_hist=True) print('Plotting varying r... number of deltas:', vary_r.shape[0]) sns.distplot(vary_r, ax=axarr, color=em.dp_colours['augment'], label=r'$\Delta_V^{fix}$', kde=True, norm_hist=True) sns.distplot(vary_r_diffinit, ax=axarr, color=em.dp_colours['augment_diffinit'], label=r'$\Delta_V^{vary}$', kde=True, norm_hist=True) print('Plotting varying both... number of deltas:', vary_both.shape[0]) sns.distplot(vary_both, ax=axarr, color=em.dp_colours['both'], label=r'$\Delta_{S+V}^{fix}$', kde=True, hist=False, kde_kws={'linestyle': '--'}) sns.distplot(vary_both_diffinit, ax=axarr, color=em.dp_colours['both_diffinit'], label=r'$\Delta_{S+V}^{vary}$', kde=True, hist=False, kde_kws={ 'linestyle': ':', 'lw': 2 }) if include_bounds: assert model == 'logistic' lipschitz_constant = np.sqrt(2.0) _, batch_size, lr, _, N = em.get_experiment_details(cfg_name, model, verbose=True) wu_bound = test_private_model.compute_wu_bound(lipschitz_constant, t=t, N=N, batch_size=batch_size, eta=lr) axarr.axvline(x=wu_bound, ls='--', color=em.dp_colours['bolton'], label=r'$\hat{\Delta}_S$') axarr.legend() axarr.set_xlabel(r'$\|w - w^\prime\|$') axarr.set_ylabel('density') if xlim is not None: axarr.set_xlim(xlim) if ylim is not None: axarr.set_ylim(ylim) vis_utils.beautify_axes(np.array([axarr])) plt.tight_layout() figure_identifier = f'delta_histogram_{cfg_name}_{data_privacy}_{model}_t{t}' plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.png')) plt.savefig((FIGS_DIR / figure_identifier).with_suffix('.pdf')) return