def create_plots(stats_list, keys_to_plot, x_key, groups, result_dir, include_val=True): n_keys = len(keys_to_plot) n_chars = len(str(n_keys)) f = ' {:' + str(n_chars) + 'd}/{:' + str(n_chars) + 'd} monitored keys plotted' groups_org = groups.copy() for i_key, k in enumerate(keys_to_plot): # Get data and subset only those series that are done (or the one that is the longest) groups = groups_org.copy() list_of_series = [s[k].tolist() for s in stats_list if k in s] list_of_xs = [s[x_key].tolist() for s in stats_list if k in s] # [range(len(s)) for s in stats_list if k in s] l = length_of_longest(list_of_series) indices = [i for i, series in enumerate(list_of_series) if len(series) == l] groups = groups[indices] list_of_series = [list_of_series[i] for i in indices] list_of_xs = [list_of_xs[i] for i in indices] # Validation series if include_val: val_k = k[:-4] + '_val' list_of_series_val = [s[val_k].tolist() for i, s in enumerate(stats_list) if val_k in s and i in indices] if include_val and not len(list_of_series_val) == 0: # list_of_xs_val = [np.where(~np.isnan(l))[0].tolist() for l_s in list_of_series_val] list_of_xs_val = [np.array(l_x)[~np.isnan(l_s)].tolist() for l_x, l_s in zip(list_of_xs, list_of_series_val)] list_of_xs.extend(list_of_xs_val) list_of_series_val = [np.array(l) for l in list_of_series_val] list_of_series_val = [l[~np.isnan(l)].tolist() for l in list_of_series_val] list_of_series.extend(list_of_series_val) groups_val = np.array([g + ', validation' for g in groups]) groups = np.append(groups, groups_val) # Sort list_of_xs = [x for _,x in sorted(zip(groups.tolist(), list_of_xs))] list_of_series = [x for _,x in sorted(zip(groups.tolist(), list_of_series))] groups.sort() # Plot if include_val: plot.timeseries_mean_grouped(list_of_xs, list_of_series, groups, xlabel=x_key, ylabel=k, map_labels='supervised') else: plot.timeseries_mean_grouped(list_of_xs, list_of_series, groups, xlabel=x_key, ylabel=k, map_labels='reinforcement') if include_val: if 'return' in k: plt.gca().set_ylim(0, 1.5) elif 'accuracy' in k: plt.gca().set_ylim(0.4, 1) if x_key == 'generations': plt.savefig(os.path.join(result_dir, k + '-all-series-mean-sd' + '.pdf'), bbox_inches='tight') else: plt.savefig(os.path.join(result_dir, x_key + '-' + k + '-all-series-mean-sd' + '.pdf'), bbox_inches='tight') plt.close() # Progress if i_key + 1 == n_keys: print(f.format(i_key+1, n_keys), end='\n') else: print(f.format(i_key+1, n_keys), end='\r')
def create_plots(stats_list, keys_to_plot, groups, result_dir, include_val=True): n_keys = len(keys_to_plot) n_chars = len(str(n_keys)) f = ' {:' + str(n_chars) + 'd}/{:' + str( n_chars) + 'd} monitored keys plotted' groups_org = groups.copy() for i_key, k in enumerate(keys_to_plot): # Get data and subset only those series that are done (or the one that is the longest) groups = groups_org.copy() list_of_series = [s[k].tolist() for s in stats_list if k in s] list_of_genera = [range(len(s)) for s in stats_list if k in s] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] groups = groups[indices] list_of_series = [list_of_series[i] for i in indices] list_of_genera = [list_of_genera[i] for i in indices] # Validation series if include_val: val_k = k[:-4] + '_val' list_of_series_val = [ s[val_k].tolist() for i, s in enumerate(stats_list) if val_k in s and i in indices ] if include_val and not len(list_of_series_val) == 0: list_of_genera_val = [ np.where(~np.isnan(l))[0].tolist() for l in list_of_series_val ] list_of_genera.extend(list_of_genera_val) list_of_series_val = [np.array(l) for l in list_of_series_val] list_of_series_val = [ l[~np.isnan(l)].tolist() for l in list_of_series_val ] list_of_series.extend(list_of_series_val) groups_val = np.array([g + ', Validation' for g in groups]) groups = np.append(groups, groups_val) # Sort list_of_genera = [ x for _, x in sorted(zip(groups.tolist(), list_of_genera)) ] list_of_series = [ x for _, x in sorted(zip(groups.tolist(), list_of_series)) ] groups.sort() # Plot plot.timeseries_mean_grouped(list_of_genera, list_of_series, groups, xlabel='generations', ylabel=k, map_labels='reinforcement') #TODO: set ylim for loglikelihood, leave without lims for RL # if 'return' in k: # plt.gca().set_ylim(0, 3) # elif 'accuracy' in k: # plt.gca().set_ylim(0.3, 1) plt.savefig(os.path.join(result_dir, k + '_' + groups[0] + '_baseline' + '.pdf'), bbox_inches='tight') plt.close() # Progress if i_key + 1 == n_keys: print(f.format(i_key + 1, n_keys), end='\n') else: print(f.format(i_key + 1, n_keys), end='\r')
def create_plots(args, stats_list, keys_to_monitor, groups): unique_groups = set(groups) n_keys = len(keys_to_monitor) n_chars = len(str(n_keys)) f = ' {:' + str(n_chars) + 'd}/{:' + str( n_chars) + 'd} monitored keys plotted' for i_key, k in enumerate(keys_to_monitor): list_of_series = [s[k].tolist() for s in stats_list if k in s] list_of_genera = [range(len(s)) for s in stats_list if k in s] plot.timeseries(list_of_genera, list_of_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join(args.monitor_dir, k + '-all-series.pdf'), bbox_inches='tight') plt.close() plot.timeseries_distribution(list_of_genera, list_of_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join(args.monitor_dir, k + '-all-distribution.pdf'), bbox_inches='tight') plt.close() plot.timeseries_median(list_of_genera, list_of_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join(args.monitor_dir, k + '-all-median.pdf'), bbox_inches='tight') plt.close() plot.timeseries_final_distribution(list_of_series, label=k, ybins=len(list_of_series) * 10) plt.savefig(os.path.join(args.monitor_dir, k + '-all-final-distribution.pdf'), bbox_inches='tight') plt.close() # Subset only those series that are done (or the one that is the longest) l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] list_of_longest_series = [list_of_series[i] for i in indices] list_of_longest_genera = [list_of_genera[i] for i in indices] groups_longest_series = groups[indices] plot.timeseries_mean_grouped(list_of_longest_genera, list_of_longest_series, groups_longest_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join(args.monitor_dir, k + '-all-series-mean-sd' + '.pdf'), bbox_inches='tight') plt.close() if len(unique_groups) > 1: for g in unique_groups: gstr = '{0:02d}'.format(g) g_indices = np.where(groups == g)[0] group_stats = [stats_list[i] for i in g_indices] list_of_series = [s[k].tolist() for s in group_stats if k in s] list_of_genera = [range(len(s)) for s in group_stats if k in s] if list_of_genera and list_of_series: plot.timeseries(list_of_genera, list_of_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join( args.monitor_dir, k + '-group-' + gstr + '-series.pdf'), bbox_inches='tight') plt.close() plot.timeseries_distribution(list_of_genera, list_of_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join( args.monitor_dir, k + '-group-' + gstr + '-distribution.pdf'), bbox_inches='tight') plt.close() plot.timeseries_median(list_of_genera, list_of_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join( args.monitor_dir, k + '-group-' + gstr + '-median.pdf'), bbox_inches='tight') plt.close() plot.timeseries_final_distribution( list_of_series, label=k, ybins=len(list_of_series) * 10) plt.savefig(os.path.join( args.monitor_dir, k + '-group-' + gstr + '-final-distribution.pdf'), bbox_inches='tight') plt.close() if i_key + 1 == n_keys: print(f.format(i_key + 1, n_keys), end='\n') else: print(f.format(i_key + 1, n_keys), end='\r')
print("Loaded " + str(i) + "/" + str(len(directories)) + ": " + d) except: print("No files found in: " + d) invert_signs(algorithm_states, keys='all') groups = [0] * len(algorithm_states) for i, s in enumerate(algorithm_states): if s['safe_mutation'] == 'SUM': groups[i] = 1 groups = np.array(groups) for k in ['return_unp', 'return_avg', 'return_min', 'return_max']: list_of_series = [s['stats'][k] for s in algorithm_states] list_of_genera = [s['stats']['generations'] for s in algorithm_states] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] list_of_series = [list_of_series[i] for i in indices] list_of_genera = [list_of_genera[i] for i in indices] groups_longest_series = groups[indices] plot.timeseries_median_grouped(list_of_genera, list_of_series, groups_longest_series, xlabel='generations', ylabel=k) plt.savefig(os.path.join(save_dir, 'analysis-01-' + k + '.pdf'), bbox_inches='tight')
def violinplots(stats, keys_to_plot, groups, result_dir): groups_org = groups.copy() if keys_to_plot[0][:-4] == 'return': ylabel = 'NLL' elif keys_to_plot[0][:-4] == 'accuracy': ylabel = 'Classification accuracy' df = pd.DataFrame([]) for k in keys_to_plot: # Get data and subset only those series that are done (or the one that is the longest) # list_of_series = [s[k].tolist() for s in stats if k in s] # l = length_of_longest(list_of_series) # indices = [i for i, series in enumerate(list_of_series) if len(series) == l] # groups = groups_org[indices] # list_of_final = [list_of_series[i][-1] for i in indices] list_of_series = [s[k].tolist() for s in stats if k in s] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] groups = groups_org[indices] list_of_final = [] for i in indices: a = np.array(list_of_series[i]) list_of_final.append(a[~np.isnan(a)][-1]) # n_groups = len(np.unique(groups)) colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups)) for g, c in zip(np.unique(groups), colors[0:n_groups]): g_indices = np.where(groups == g)[0] list_of_final_group = [list_of_final[i] for i in g_indices] label = ' '.join( lookup_label(k, mode='supervised').split(' ')[:1] ) # Only two first label words (disregard accucracy, NLL etc.) if k[-4:] == '_val': label = 'Validation (unperturbed)' df_new = pd.DataFrame({ 'final_val': list_of_final_group, 'group': g, 'label': label }) df = pd.concat([df, df_new], axis=0, ignore_index=True) # my_order = [r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)'] # positions = [my_order.index(e) for e in df.keys() if e in my_order] fig, ax = plt.subplots() fig.set_size_inches(*plt.rcParams.get('figure.figsize')) g = sns.factorplot(ax=ax, x="group", y="final_val", hue="label", data=df, kind="violin", legend=False) g.despine(left=True) # g.set_xticklabels(rotation=10) ax.set_xticklabels(ax.get_xticklabels(), rotation=10) ax.set_xlabel('') ax.set_ylabel(ylabel) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width, box.height * 0.9]) ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1), ncol=3) fig.savefig(os.path.join( result_dir, k[:-4] + '-final-distribution-boxplot-grouped' + '.pdf'), bbox_inches='tight') plt.close(fig)
def final_distribution(stats, keys_to_plot, groups, result_dir, include_val=True): groups_org = groups.copy() include_val_setting = include_val for i_key, k in enumerate(keys_to_plot): include_val = include_val_setting if include_val: for s in stats: if k[-4:] != '_unp' or k[:-4] + '_val' not in s: include_val = False # Get data and subset only those series that are done (or the one that is the longest) list_of_series = [s[k].tolist() for s in stats if k in s] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] groups = groups_org[indices] list_of_final = [list_of_series[i][-1] for i in indices] # fig, ax = plt.subplots() xlabel = lookup_label(k, mode='supervised') ax.set_xlabel(xlabel) ax.set_ylabel('CDF') legend = [] n_groups = len(np.unique(groups)) df = pd.DataFrame([]) colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups)) for g, c in zip(np.unique(groups), colors[0:n_groups]): g_indices = np.where(groups == g)[0] list_of_final_group = [list_of_final[i] for i in g_indices] ax.hist(list_of_final_group, alpha=0.6, density=True, histtype='step', cumulative=True, linewidth=2, color=c) legend.append(g) df_new = pd.DataFrame({g: list_of_final_group}) df = pd.concat([df, df_new], axis=1) ax.legend(legend, loc='northwest') fig.savefig(os.path.join(result_dir, k + '-final-distribution' + '.pdf'), bbox_inches='tight') plt.close(fig) fig, ax = plt.subplots() my_order = [ r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)' ] positions = [my_order.index(e) for e in df.keys() if e in my_order] df.boxplot(rot=10, positions=positions, showfliers=True) ax.xaxis.grid(False) ax.set_xlabel('') ax.set_ylabel(xlabel) # ax.set_ylim(auto=True) fig.savefig(os.path.join(result_dir, k + '-final-distribution-boxplot' + '.pdf'), bbox_inches='tight') plt.close(fig) # if include_val: k_val = k[:-4] + '_val' list_of_series = [s[k_val].tolist() for s in stats if k_val in s] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] groups = groups_org[indices] list_of_final = [] for i in indices: a = np.array(list_of_series[i]) list_of_final.append(a[~np.isnan(a)][-1]) # fig, ax = plt.subplots() xlabel = lookup_label(k_val, mode='supervised') ax.set_xlabel(xlabel) ax.set_ylabel('CDF') legend = [] n_groups = len(np.unique(groups)) df = pd.DataFrame([]) colors = plt.cm.gnuplot(np.linspace(0.1, 1, n_groups)) for g, c in zip(np.unique(groups), colors[0:n_groups]): g_indices = np.where(groups == g)[0] list_of_final_group = [list_of_final[i] for i in g_indices] ax.hist(list_of_final_group, alpha=0.6, density=True, histtype='step', cumulative=True, linewidth=2, color=c) legend.append(g) df_new = pd.DataFrame({g: list_of_final_group}) df = pd.concat([df, df_new], axis=1) fig.savefig(os.path.join(result_dir, k_val + '-final-distribution' + '.pdf'), bbox_inches='tight') plt.close(fig) fig, ax = plt.subplots() my_order = [ r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)' ] positions = [my_order.index(e) for e in df.keys() if e in my_order] df.boxplot(rot=10, positions=positions, showfliers=True) ax.xaxis.grid(False) ax.set_xlabel('') ax.set_ylabel(xlabel) fig.savefig(os.path.join( result_dir, k_val + '-final-distribution-boxplot' + '.pdf'), bbox_inches='tight') plt.close(fig)