예제 #1
0
def create_plots(stats_list, keys_to_plot, x_key, groups, result_dir, include_val=True):
    n_keys = len(keys_to_plot)
    n_chars = len(str(n_keys))
    f = '    {:' + str(n_chars) + 'd}/{:' + str(n_chars) + 'd} monitored keys plotted'
    groups_org = groups.copy()
    for i_key, k in enumerate(keys_to_plot):
        # Get data and subset only those series that are done (or the one that is the longest)
        groups = groups_org.copy()
        list_of_series = [s[k].tolist() for s in stats_list if k in s]
        list_of_xs = [s[x_key].tolist() for s in stats_list if k in s]  # [range(len(s)) for s in stats_list if k in s]  
        l = length_of_longest(list_of_series)
        indices = [i for i, series in enumerate(list_of_series) if len(series) == l]
        groups = groups[indices]
        list_of_series = [list_of_series[i] for i in indices]
        list_of_xs = [list_of_xs[i] for i in indices]

        # Validation series
        if include_val:
            val_k = k[:-4] + '_val'
            list_of_series_val = [s[val_k].tolist() for i, s in enumerate(stats_list) if val_k in s and i in indices]
        if include_val and not len(list_of_series_val) == 0:
            # list_of_xs_val = [np.where(~np.isnan(l))[0].tolist() for l_s in list_of_series_val]
            list_of_xs_val = [np.array(l_x)[~np.isnan(l_s)].tolist() for l_x, l_s in zip(list_of_xs, list_of_series_val)]
            list_of_xs.extend(list_of_xs_val)
            list_of_series_val = [np.array(l) for l in list_of_series_val]
            list_of_series_val = [l[~np.isnan(l)].tolist() for l in list_of_series_val]
            list_of_series.extend(list_of_series_val)
            groups_val = np.array([g + ', validation' for g in groups])
            groups = np.append(groups, groups_val)

        # Sort
        list_of_xs = [x for _,x in sorted(zip(groups.tolist(), list_of_xs))]
        list_of_series = [x for _,x in sorted(zip(groups.tolist(), list_of_series))]
        groups.sort()

        # Plot
        if include_val:
            plot.timeseries_mean_grouped(list_of_xs, list_of_series, groups, xlabel=x_key, ylabel=k, map_labels='supervised')
        else:
            plot.timeseries_mean_grouped(list_of_xs, list_of_series, groups, xlabel=x_key, ylabel=k, map_labels='reinforcement')
        if include_val:
            if 'return' in k:
                plt.gca().set_ylim(0, 1.5)
            elif 'accuracy' in k:
                plt.gca().set_ylim(0.4, 1)
        if x_key == 'generations':
            plt.savefig(os.path.join(result_dir, k + '-all-series-mean-sd' + '.pdf'), bbox_inches='tight')
        else:
            plt.savefig(os.path.join(result_dir, x_key + '-' + k + '-all-series-mean-sd' + '.pdf'), bbox_inches='tight')
        plt.close()
        # Progress
        if i_key + 1 == n_keys:
            print(f.format(i_key+1, n_keys), end='\n')
        else:
            print(f.format(i_key+1, n_keys), end='\r')
예제 #2
0
def create_plots(stats_list,
                 keys_to_plot,
                 groups,
                 result_dir,
                 include_val=True):
    n_keys = len(keys_to_plot)
    n_chars = len(str(n_keys))
    f = '    {:' + str(n_chars) + 'd}/{:' + str(
        n_chars) + 'd} monitored keys plotted'
    groups_org = groups.copy()
    for i_key, k in enumerate(keys_to_plot):
        # Get data and subset only those series that are done (or the one that is the longest)
        groups = groups_org.copy()
        list_of_series = [s[k].tolist() for s in stats_list if k in s]
        list_of_genera = [range(len(s)) for s in stats_list if k in s]
        l = length_of_longest(list_of_series)
        indices = [
            i for i, series in enumerate(list_of_series) if len(series) == l
        ]
        groups = groups[indices]
        list_of_series = [list_of_series[i] for i in indices]
        list_of_genera = [list_of_genera[i] for i in indices]

        # Validation series
        if include_val:
            val_k = k[:-4] + '_val'
            list_of_series_val = [
                s[val_k].tolist() for i, s in enumerate(stats_list)
                if val_k in s and i in indices
            ]
        if include_val and not len(list_of_series_val) == 0:
            list_of_genera_val = [
                np.where(~np.isnan(l))[0].tolist() for l in list_of_series_val
            ]
            list_of_genera.extend(list_of_genera_val)
            list_of_series_val = [np.array(l) for l in list_of_series_val]
            list_of_series_val = [
                l[~np.isnan(l)].tolist() for l in list_of_series_val
            ]
            list_of_series.extend(list_of_series_val)
            groups_val = np.array([g + ', Validation' for g in groups])
            groups = np.append(groups, groups_val)

        # Sort
        list_of_genera = [
            x for _, x in sorted(zip(groups.tolist(), list_of_genera))
        ]
        list_of_series = [
            x for _, x in sorted(zip(groups.tolist(), list_of_series))
        ]
        groups.sort()

        # Plot
        plot.timeseries_mean_grouped(list_of_genera,
                                     list_of_series,
                                     groups,
                                     xlabel='generations',
                                     ylabel=k,
                                     map_labels='reinforcement')
        #TODO: set ylim for loglikelihood, leave without lims for RL
        #        if 'return' in k:
        #            plt.gca().set_ylim(0, 3)
        #        elif 'accuracy' in k:
        #            plt.gca().set_ylim(0.3, 1)
        plt.savefig(os.path.join(result_dir,
                                 k + '_' + groups[0] + '_baseline' + '.pdf'),
                    bbox_inches='tight')
        plt.close()
        # Progress
        if i_key + 1 == n_keys:
            print(f.format(i_key + 1, n_keys), end='\n')
        else:
            print(f.format(i_key + 1, n_keys), end='\r')
예제 #3
0
def create_plots(args, stats_list, keys_to_monitor, groups):
    unique_groups = set(groups)
    n_keys = len(keys_to_monitor)
    n_chars = len(str(n_keys))
    f = '    {:' + str(n_chars) + 'd}/{:' + str(
        n_chars) + 'd} monitored keys plotted'
    for i_key, k in enumerate(keys_to_monitor):
        list_of_series = [s[k].tolist() for s in stats_list if k in s]
        list_of_genera = [range(len(s)) for s in stats_list if k in s]

        plot.timeseries(list_of_genera,
                        list_of_series,
                        xlabel='generations',
                        ylabel=k)
        plt.savefig(os.path.join(args.monitor_dir, k + '-all-series.pdf'),
                    bbox_inches='tight')
        plt.close()

        plot.timeseries_distribution(list_of_genera,
                                     list_of_series,
                                     xlabel='generations',
                                     ylabel=k)
        plt.savefig(os.path.join(args.monitor_dir,
                                 k + '-all-distribution.pdf'),
                    bbox_inches='tight')
        plt.close()

        plot.timeseries_median(list_of_genera,
                               list_of_series,
                               xlabel='generations',
                               ylabel=k)
        plt.savefig(os.path.join(args.monitor_dir, k + '-all-median.pdf'),
                    bbox_inches='tight')
        plt.close()

        plot.timeseries_final_distribution(list_of_series,
                                           label=k,
                                           ybins=len(list_of_series) * 10)
        plt.savefig(os.path.join(args.monitor_dir,
                                 k + '-all-final-distribution.pdf'),
                    bbox_inches='tight')
        plt.close()

        # Subset only those series that are done (or the one that is the longest)
        l = length_of_longest(list_of_series)
        indices = [
            i for i, series in enumerate(list_of_series) if len(series) == l
        ]
        list_of_longest_series = [list_of_series[i] for i in indices]
        list_of_longest_genera = [list_of_genera[i] for i in indices]
        groups_longest_series = groups[indices]
        plot.timeseries_mean_grouped(list_of_longest_genera,
                                     list_of_longest_series,
                                     groups_longest_series,
                                     xlabel='generations',
                                     ylabel=k)
        plt.savefig(os.path.join(args.monitor_dir,
                                 k + '-all-series-mean-sd' + '.pdf'),
                    bbox_inches='tight')
        plt.close()

        if len(unique_groups) > 1:
            for g in unique_groups:
                gstr = '{0:02d}'.format(g)
                g_indices = np.where(groups == g)[0]
                group_stats = [stats_list[i] for i in g_indices]

                list_of_series = [s[k].tolist() for s in group_stats if k in s]
                list_of_genera = [range(len(s)) for s in group_stats if k in s]
                if list_of_genera and list_of_series:
                    plot.timeseries(list_of_genera,
                                    list_of_series,
                                    xlabel='generations',
                                    ylabel=k)
                    plt.savefig(os.path.join(
                        args.monitor_dir,
                        k + '-group-' + gstr + '-series.pdf'),
                                bbox_inches='tight')
                    plt.close()

                    plot.timeseries_distribution(list_of_genera,
                                                 list_of_series,
                                                 xlabel='generations',
                                                 ylabel=k)
                    plt.savefig(os.path.join(
                        args.monitor_dir,
                        k + '-group-' + gstr + '-distribution.pdf'),
                                bbox_inches='tight')
                    plt.close()

                    plot.timeseries_median(list_of_genera,
                                           list_of_series,
                                           xlabel='generations',
                                           ylabel=k)
                    plt.savefig(os.path.join(
                        args.monitor_dir,
                        k + '-group-' + gstr + '-median.pdf'),
                                bbox_inches='tight')
                    plt.close()

                    plot.timeseries_final_distribution(
                        list_of_series,
                        label=k,
                        ybins=len(list_of_series) * 10)
                    plt.savefig(os.path.join(
                        args.monitor_dir,
                        k + '-group-' + gstr + '-final-distribution.pdf'),
                                bbox_inches='tight')
                    plt.close()

        if i_key + 1 == n_keys:
            print(f.format(i_key + 1, n_keys), end='\n')
        else:
            print(f.format(i_key + 1, n_keys), end='\r')
예제 #4
0
        print("Loaded " + str(i) + "/" + str(len(directories)) + ": " + d)
    except:
        print("No files found in: " + d)
invert_signs(algorithm_states, keys='all')

groups = [0] * len(algorithm_states)
for i, s in enumerate(algorithm_states):
    if s['safe_mutation'] == 'SUM':
        groups[i] = 1
groups = np.array(groups)

for k in ['return_unp', 'return_avg', 'return_min', 'return_max']:

    list_of_series = [s['stats'][k] for s in algorithm_states]
    list_of_genera = [s['stats']['generations'] for s in algorithm_states]
    l = length_of_longest(list_of_series)
    indices = [
        i for i, series in enumerate(list_of_series) if len(series) == l
    ]

    list_of_series = [list_of_series[i] for i in indices]
    list_of_genera = [list_of_genera[i] for i in indices]
    groups_longest_series = groups[indices]

    plot.timeseries_median_grouped(list_of_genera,
                                   list_of_series,
                                   groups_longest_series,
                                   xlabel='generations',
                                   ylabel=k)
    plt.savefig(os.path.join(save_dir, 'analysis-01-' + k + '.pdf'),
                bbox_inches='tight')
예제 #5
0
def violinplots(stats, keys_to_plot, groups, result_dir):
    groups_org = groups.copy()

    if keys_to_plot[0][:-4] == 'return':
        ylabel = 'NLL'
    elif keys_to_plot[0][:-4] == 'accuracy':
        ylabel = 'Classification accuracy'

    df = pd.DataFrame([])
    for k in keys_to_plot:
        # Get data and subset only those series that are done (or the one that is the longest)
        # list_of_series = [s[k].tolist() for s in stats if k in s]
        # l = length_of_longest(list_of_series)
        # indices = [i for i, series in enumerate(list_of_series) if len(series) == l]
        # groups = groups_org[indices]
        # list_of_final = [list_of_series[i][-1] for i in indices]
        list_of_series = [s[k].tolist() for s in stats if k in s]
        l = length_of_longest(list_of_series)
        indices = [
            i for i, series in enumerate(list_of_series) if len(series) == l
        ]
        groups = groups_org[indices]
        list_of_final = []
        for i in indices:
            a = np.array(list_of_series[i])
            list_of_final.append(a[~np.isnan(a)][-1])
        #
        n_groups = len(np.unique(groups))
        colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups))
        for g, c in zip(np.unique(groups), colors[0:n_groups]):
            g_indices = np.where(groups == g)[0]
            list_of_final_group = [list_of_final[i] for i in g_indices]
            label = ' '.join(
                lookup_label(k, mode='supervised').split(' ')[:1]
            )  # Only two first label words (disregard accucracy, NLL etc.)
            if k[-4:] == '_val':
                label = 'Validation (unperturbed)'
            df_new = pd.DataFrame({
                'final_val': list_of_final_group,
                'group': g,
                'label': label
            })
            df = pd.concat([df, df_new], axis=0, ignore_index=True)

    # my_order = [r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)']
    # positions = [my_order.index(e) for e in df.keys() if e in my_order]

    fig, ax = plt.subplots()
    fig.set_size_inches(*plt.rcParams.get('figure.figsize'))
    g = sns.factorplot(ax=ax,
                       x="group",
                       y="final_val",
                       hue="label",
                       data=df,
                       kind="violin",
                       legend=False)
    g.despine(left=True)
    # g.set_xticklabels(rotation=10)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=10)
    ax.set_xlabel('')
    ax.set_ylabel(ylabel)
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height * 0.9])
    ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1), ncol=3)
    fig.savefig(os.path.join(
        result_dir, k[:-4] + '-final-distribution-boxplot-grouped' + '.pdf'),
                bbox_inches='tight')
    plt.close(fig)
예제 #6
0
def final_distribution(stats,
                       keys_to_plot,
                       groups,
                       result_dir,
                       include_val=True):
    groups_org = groups.copy()
    include_val_setting = include_val
    for i_key, k in enumerate(keys_to_plot):
        include_val = include_val_setting
        if include_val:
            for s in stats:
                if k[-4:] != '_unp' or k[:-4] + '_val' not in s:
                    include_val = False
        # Get data and subset only those series that are done (or the one that is the longest)
        list_of_series = [s[k].tolist() for s in stats if k in s]
        l = length_of_longest(list_of_series)
        indices = [
            i for i, series in enumerate(list_of_series) if len(series) == l
        ]
        groups = groups_org[indices]
        list_of_final = [list_of_series[i][-1] for i in indices]
        #
        fig, ax = plt.subplots()
        xlabel = lookup_label(k, mode='supervised')
        ax.set_xlabel(xlabel)
        ax.set_ylabel('CDF')
        legend = []
        n_groups = len(np.unique(groups))
        df = pd.DataFrame([])
        colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups))
        for g, c in zip(np.unique(groups), colors[0:n_groups]):
            g_indices = np.where(groups == g)[0]
            list_of_final_group = [list_of_final[i] for i in g_indices]
            ax.hist(list_of_final_group,
                    alpha=0.6,
                    density=True,
                    histtype='step',
                    cumulative=True,
                    linewidth=2,
                    color=c)
            legend.append(g)
            df_new = pd.DataFrame({g: list_of_final_group})
            df = pd.concat([df, df_new], axis=1)
        ax.legend(legend, loc='northwest')
        fig.savefig(os.path.join(result_dir,
                                 k + '-final-distribution' + '.pdf'),
                    bbox_inches='tight')
        plt.close(fig)

        fig, ax = plt.subplots()
        my_order = [
            r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)',
            r'Separable (parameter)'
        ]
        positions = [my_order.index(e) for e in df.keys() if e in my_order]
        df.boxplot(rot=10, positions=positions, showfliers=True)
        ax.xaxis.grid(False)
        ax.set_xlabel('')
        ax.set_ylabel(xlabel)
        # ax.set_ylim(auto=True)
        fig.savefig(os.path.join(result_dir,
                                 k + '-final-distribution-boxplot' + '.pdf'),
                    bbox_inches='tight')
        plt.close(fig)

        #
        if include_val:
            k_val = k[:-4] + '_val'
            list_of_series = [s[k_val].tolist() for s in stats if k_val in s]
            l = length_of_longest(list_of_series)
            indices = [
                i for i, series in enumerate(list_of_series)
                if len(series) == l
            ]
            groups = groups_org[indices]
            list_of_final = []
            for i in indices:
                a = np.array(list_of_series[i])
                list_of_final.append(a[~np.isnan(a)][-1])
            #
            fig, ax = plt.subplots()
            xlabel = lookup_label(k_val, mode='supervised')
            ax.set_xlabel(xlabel)
            ax.set_ylabel('CDF')
            legend = []
            n_groups = len(np.unique(groups))
            df = pd.DataFrame([])
            colors = plt.cm.gnuplot(np.linspace(0.1, 1, n_groups))
            for g, c in zip(np.unique(groups), colors[0:n_groups]):
                g_indices = np.where(groups == g)[0]
                list_of_final_group = [list_of_final[i] for i in g_indices]
                ax.hist(list_of_final_group,
                        alpha=0.6,
                        density=True,
                        histtype='step',
                        cumulative=True,
                        linewidth=2,
                        color=c)
                legend.append(g)
                df_new = pd.DataFrame({g: list_of_final_group})
                df = pd.concat([df, df_new], axis=1)
            fig.savefig(os.path.join(result_dir,
                                     k_val + '-final-distribution' + '.pdf'),
                        bbox_inches='tight')
            plt.close(fig)

            fig, ax = plt.subplots()
            my_order = [
                r'Isotropic (fixed $\sigma$)', r'Isotropic',
                r'Separable (layer)', r'Separable (parameter)'
            ]
            positions = [my_order.index(e) for e in df.keys() if e in my_order]
            df.boxplot(rot=10, positions=positions, showfliers=True)
            ax.xaxis.grid(False)
            ax.set_xlabel('')
            ax.set_ylabel(xlabel)
            fig.savefig(os.path.join(
                result_dir, k_val + '-final-distribution-boxplot' + '.pdf'),
                        bbox_inches='tight')
            plt.close(fig)