def timeseries(xdatas, ydatas, xlabel, ylabel, plotlabels=None, figsize=(6.4, 4.8), map_labels=False): if map_labels: xlabel = lookup_label(xlabel, mode=map_labels) plotlabels = [ lookup_label(plotlabel, mode=map_labels) for plotlabel in plotlabels ] if plotlabels is None: plotlabels = [None] * len(xdatas) fig = plt.figure(figsize=figsize) handles = [] maxs = [] for xdata, ydata, plotlabel in zip(xdatas, ydatas, plotlabels): ydata = moving_average(ydata) maxs.append(np.max(ydata)) handles.extend(plt.plot(xdata, ydata, label=plotlabel)) if plotlabels is not None: plt.legend(handles=handles, ncol=2, loc='best') # plt.gca().set_ylim([None, np.mean(maxs)*1.2]) plt.xlabel(xlabel) plt.ylabel(ylabel)
def timeseries_median(xdatas, ydatas, xlabel, ylabel, figsize=(6.4, 4.8), map_labels=False): if map_labels: xlabel = lookup_label(xlabel, mode=map_labels) ylabel = lookup_label(ylabel, mode=map_labels) length = len(sorted(ydatas, key=len, reverse=True)[0]) ydata = [] for i in range(len(ydatas)): ydata.append(ydatas[i] + [np.NaN] * (length - len(ydatas[i]))) ydata[i] = np.array(ydata[i]) yavgs = np.nanmedian(ydata, 0) ymaxs = np.nanmax(ydata, 0) ymins = np.nanmin(ydata, 0) xdata = get_longest_sublists(xdatas)[0] h = [] fig = plt.figure() h.extend(plt.plot(xdata, moving_average(yavgs), label='MA of median')) h.extend(plt.plot(xdata, moving_average(ymaxs), label='MA of max')) h.extend(plt.plot(xdata, moving_average(ymins), label='MA of min')) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.legend(handles=h, loc='best')
def timeseries_mean_grouped(xdatas, ydatas, groups, xlabel, ylabel, figsize=(6.4, 4.8), points_in_plot=200, map_labels=False): assert type(groups) == np.ndarray if map_labels: xlabel = lookup_label(xlabel, mode=map_labels) ylabel = lookup_label(ylabel, mode=map_labels) sns.set(color_codes=True) plt.figure(figsize=figsize) legend = [] n_groups = len(np.unique(groups)) # if n_groups <= 6: # colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups)) # #colors = np.reshape(np.append(colors[0::2], colors[1::2]), (6, 4)) # else: colors = plt.cm.tab20(np.linspace(0, 1, n_groups)) sns.set_style("ticks") for g, c in zip(np.unique(groups), colors[0:n_groups]): if type(g) in [str, np.str, np.str_]: gstr = g else: gstr = 'G{0:02d}'.format(g) legend.append(gstr) g_indices = np.where(groups == g)[0] ydatas_grouped = [ydatas[i] for i in g_indices] length = len(sorted(ydatas_grouped, key=len, reverse=True)[0]) nsub = int(length / points_in_plot) if points_in_plot < length else 1 ydata = np.array([ ydata + [np.NaN] * (length - len(ydata)) for ydata in ydatas_grouped ]) # Subsample y ydata_subsampled = ydata[:, ::nsub] if np.prod( ydata.shape) > nsub else ydata # if ydata_subsampled[-1] != ydata[-1]: # ydata_subsampled = np.append(ydata_subsampled, ydata[0,-1]) x = [xdatas[i] for i in g_indices] x = get_longest_sublists(x)[0] if type(x) in [range, list]: x = np.array(x) # Subsample x x_subsampled = x[::nsub] if len(x) > nsub else x # if x_subsampled[-1] != x[-1] # x_subsampled = np.append(x_subsampled, x[-1]) ax = sns.tsplot(value=ylabel, data=ydata_subsampled, time=x_subsampled, ci="sd", estimator=np.mean, color=c) lines = list( filter(lambda c: type(c) == mpl.lines.Line2D, ax.get_children())) plt.legend(handles=lines, labels=legend) plt.xlabel(xlabel)
def timeseries_final_distribution(datas, label, ybins='auto', figsize=(6.4, 4.8), map_labels=False): if map_labels: label = lookup_label(label, mode=map_labels) datas_final = [ydata[-1] for ydata in datas] fig, ax = plt.subplots(figsize=figsize) ax.hist(datas_final, bins=ybins) plt.xlabel(label) plt.ylabel('Counts')
def timeseries_distribution(xdatas, ydatas, xlabel, ylabel, xbins=100, ybins=100, figsize=(6.4, 4.8), map_labels=False): if map_labels: xlabel = lookup_label(xlabel, mode=map_labels) ylabel = lookup_label(ylabel, mode=map_labels) plt.rcParams['image.cmap'] = 'viridis' # Get x and y edges spanning all values maxx = max([max(xdata) for xdata in xdatas]) minx = min([min(xdata) for xdata in xdatas]) maxy = max([max(ydata) for ydata in ydatas]) miny = min([min(ydata) for ydata in ydatas]) xedges = np.linspace(minx, maxx, num=xbins) yedges = np.linspace(miny, maxy, num=ybins) # Use number of bins instead if only single unique value in data if maxy == miny: yedges = ybins if maxx == minx: xedges = xbins H, xedges, yedges = np.histogram2d(xdatas[0], ydatas[0], bins=(xedges, yedges)) counts = np.zeros(H.shape) counts += H for xdata, ydata in zip(xdatas[1:], ydatas[1:]): H, xedges, yedges = np.histogram2d(xdata, ydata, bins=(xedges, yedges)) counts += H counts = counts / counts.max() X, Y = np.meshgrid(xedges, yedges) fig, ax = plt.subplots(figsize=figsize) plt.pcolormesh(X, Y, counts.T, linewidth=0, rasterized=True) plt.colorbar() plt.xlabel(xlabel) plt.ylabel(ylabel)
def violinplots(stats, keys_to_plot, groups, result_dir): groups_org = groups.copy() if keys_to_plot[0][:-4] == 'return': ylabel = 'NLL' elif keys_to_plot[0][:-4] == 'accuracy': ylabel = 'Classification accuracy' df = pd.DataFrame([]) for k in keys_to_plot: # Get data and subset only those series that are done (or the one that is the longest) # list_of_series = [s[k].tolist() for s in stats if k in s] # l = length_of_longest(list_of_series) # indices = [i for i, series in enumerate(list_of_series) if len(series) == l] # groups = groups_org[indices] # list_of_final = [list_of_series[i][-1] for i in indices] list_of_series = [s[k].tolist() for s in stats if k in s] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] groups = groups_org[indices] list_of_final = [] for i in indices: a = np.array(list_of_series[i]) list_of_final.append(a[~np.isnan(a)][-1]) # n_groups = len(np.unique(groups)) colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups)) for g, c in zip(np.unique(groups), colors[0:n_groups]): g_indices = np.where(groups == g)[0] list_of_final_group = [list_of_final[i] for i in g_indices] label = ' '.join( lookup_label(k, mode='supervised').split(' ')[:1] ) # Only two first label words (disregard accucracy, NLL etc.) if k[-4:] == '_val': label = 'Validation (unperturbed)' df_new = pd.DataFrame({ 'final_val': list_of_final_group, 'group': g, 'label': label }) df = pd.concat([df, df_new], axis=0, ignore_index=True) # my_order = [r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)'] # positions = [my_order.index(e) for e in df.keys() if e in my_order] fig, ax = plt.subplots() fig.set_size_inches(*plt.rcParams.get('figure.figsize')) g = sns.factorplot(ax=ax, x="group", y="final_val", hue="label", data=df, kind="violin", legend=False) g.despine(left=True) # g.set_xticklabels(rotation=10) ax.set_xticklabels(ax.get_xticklabels(), rotation=10) ax.set_xlabel('') ax.set_ylabel(ylabel) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width, box.height * 0.9]) ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1), ncol=3) fig.savefig(os.path.join( result_dir, k[:-4] + '-final-distribution-boxplot-grouped' + '.pdf'), bbox_inches='tight') plt.close(fig)
def final_distribution(stats, keys_to_plot, groups, result_dir, include_val=True): groups_org = groups.copy() include_val_setting = include_val for i_key, k in enumerate(keys_to_plot): include_val = include_val_setting if include_val: for s in stats: if k[-4:] != '_unp' or k[:-4] + '_val' not in s: include_val = False # Get data and subset only those series that are done (or the one that is the longest) list_of_series = [s[k].tolist() for s in stats if k in s] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] groups = groups_org[indices] list_of_final = [list_of_series[i][-1] for i in indices] # fig, ax = plt.subplots() xlabel = lookup_label(k, mode='supervised') ax.set_xlabel(xlabel) ax.set_ylabel('CDF') legend = [] n_groups = len(np.unique(groups)) df = pd.DataFrame([]) colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups)) for g, c in zip(np.unique(groups), colors[0:n_groups]): g_indices = np.where(groups == g)[0] list_of_final_group = [list_of_final[i] for i in g_indices] ax.hist(list_of_final_group, alpha=0.6, density=True, histtype='step', cumulative=True, linewidth=2, color=c) legend.append(g) df_new = pd.DataFrame({g: list_of_final_group}) df = pd.concat([df, df_new], axis=1) ax.legend(legend, loc='northwest') fig.savefig(os.path.join(result_dir, k + '-final-distribution' + '.pdf'), bbox_inches='tight') plt.close(fig) fig, ax = plt.subplots() my_order = [ r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)' ] positions = [my_order.index(e) for e in df.keys() if e in my_order] df.boxplot(rot=10, positions=positions, showfliers=True) ax.xaxis.grid(False) ax.set_xlabel('') ax.set_ylabel(xlabel) # ax.set_ylim(auto=True) fig.savefig(os.path.join(result_dir, k + '-final-distribution-boxplot' + '.pdf'), bbox_inches='tight') plt.close(fig) # if include_val: k_val = k[:-4] + '_val' list_of_series = [s[k_val].tolist() for s in stats if k_val in s] l = length_of_longest(list_of_series) indices = [ i for i, series in enumerate(list_of_series) if len(series) == l ] groups = groups_org[indices] list_of_final = [] for i in indices: a = np.array(list_of_series[i]) list_of_final.append(a[~np.isnan(a)][-1]) # fig, ax = plt.subplots() xlabel = lookup_label(k_val, mode='supervised') ax.set_xlabel(xlabel) ax.set_ylabel('CDF') legend = [] n_groups = len(np.unique(groups)) df = pd.DataFrame([]) colors = plt.cm.gnuplot(np.linspace(0.1, 1, n_groups)) for g, c in zip(np.unique(groups), colors[0:n_groups]): g_indices = np.where(groups == g)[0] list_of_final_group = [list_of_final[i] for i in g_indices] ax.hist(list_of_final_group, alpha=0.6, density=True, histtype='step', cumulative=True, linewidth=2, color=c) legend.append(g) df_new = pd.DataFrame({g: list_of_final_group}) df = pd.concat([df, df_new], axis=1) fig.savefig(os.path.join(result_dir, k_val + '-final-distribution' + '.pdf'), bbox_inches='tight') plt.close(fig) fig, ax = plt.subplots() my_order = [ r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)' ] positions = [my_order.index(e) for e in df.keys() if e in my_order] df.boxplot(rot=10, positions=positions, showfliers=True) ax.xaxis.grid(False) ax.set_xlabel('') ax.set_ylabel(xlabel) fig.savefig(os.path.join( result_dir, k_val + '-final-distribution-boxplot' + '.pdf'), bbox_inches='tight') plt.close(fig)
def plot_stats(stats_file, chkpt_dir, wide_figure=True, map_labels=False): """ Plots training statistics - Unperturbed return - Average return - Maximum return - Minimum return - Smoothed version of the above - Return variance - Rank of unperturbed model - Sigma - Learning rate - Total wall clock time - Wall clock time per generation Possible x-axes are: - Generations - Episodes - Observations - Walltimes """ # Plot settings plt.rc('font', family='sans-serif') plt.rc('xtick', labelsize='x-small') plt.rc('ytick', labelsize='x-small') if wide_figure: figsize = mpl.figure.figaspect(9 / 16) else: figsize = (6.4, 4.8) # Load data try: stats = load_stats(stats_file) except: return # Invert sign on negative returns (negative returns indicate a converted minimization problem) if (np.array(stats['return_max']) < 0).all(): for k in [ 'return_unp', 'return_avg', 'return_min', 'return_max', 'return_val' ]: stats[k] = [-s for s in stats[k]] # Computations/Transformations psuedo_start_time = stats['walltimes'].diff().mean() # Add pseudo start time to all times abs_walltimes = stats['walltimes'] + psuedo_start_time # Append pseudo start time to top of series and compute differences stats['time_per_generation'] = pd.concat( [pd.Series(psuedo_start_time), abs_walltimes]).diff().dropna() stats['parallel_fraction'] = stats['workertimes'] / stats[ 'time_per_generation'] # Compute moving averages for c in stats.columns: if not 'Unnamed' in c and c[-3:] != '_ma': stats[c + '_ma'] = stats[c].rolling(window=10, min_periods=1, center=True, win_type=None).mean() # Plot each of the columns including moving average c_list = stats.columns.tolist() while c_list: c = c_list.pop() is_unnamed = lambda c: 'Unnamed' in c is_part_of_multi_series = lambda c: c.split('_')[-1].isdigit() is_moving_average = lambda c: c[-3:] == '_ma' if not is_unnamed(c) and not is_moving_average(c): if is_part_of_multi_series(c): # Find all c that are in this series cis = { ci for ci in stats.columns if ci.split('_')[:-1] == c.split('_')[:-1] and not is_moving_average(c) } nlines = len(cis) for ci in cis.difference({c}): c_list.remove(ci) cis = sorted(list(cis)) c = ''.join(c.split('_')[:-1]) # Loop over them and plot into same plot fig, ax = plt.subplots(figsize=figsize) ax.set_prop_cycle('color', plt.cm.tab20(np.linspace(0, 1, nlines))) for ci in cis: stats[ci].plot(ax=ax, linestyle='None', marker='.', alpha=0.06, label='_nolegend_') #ax.set_prop_cycle(None) for ci in cis: stats[ci + '_ma'].plot(ax=ax, linestyle='-', label=ci) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) else: fig, ax = plt.subplots(figsize=figsize) stats[c].astype(float) stats[c].plot(ax=ax, alpha=0.3, linestyle='None', marker='.', label='_nolegend_') ax.set_prop_cycle(None) stats[c + '_ma'].plot(ax=ax, linestyle='-', label='_nolegend_') # ax.legend(loc='best') plt.xlabel('Iteration') if map_labels: c = lookup_label(c, mode=map_labels) plt.ylabel(c) fig.savefig(os.path.join(chkpt_dir, c + '.pdf'), bbox_inches='tight') plt.close(fig)