Python lookup_label示例，utils.data_analysis.lookup_label Python示例

示例#1

0

显示文件

def timeseries(xdatas,
               ydatas,
               xlabel,
               ylabel,
               plotlabels=None,
               figsize=(6.4, 4.8),
               map_labels=False):
    if map_labels:
        xlabel = lookup_label(xlabel, mode=map_labels)
        plotlabels = [
            lookup_label(plotlabel, mode=map_labels)
            for plotlabel in plotlabels
        ]
    if plotlabels is None:
        plotlabels = [None] * len(xdatas)
    fig = plt.figure(figsize=figsize)
    handles = []
    maxs = []
    for xdata, ydata, plotlabel in zip(xdatas, ydatas, plotlabels):
        ydata = moving_average(ydata)
        maxs.append(np.max(ydata))
        handles.extend(plt.plot(xdata, ydata, label=plotlabel))
    if plotlabels is not None:
        plt.legend(handles=handles, ncol=2, loc='best')
    # plt.gca().set_ylim([None, np.mean(maxs)*1.2])
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

示例#2

0

显示文件

def timeseries_median(xdatas,
                      ydatas,
                      xlabel,
                      ylabel,
                      figsize=(6.4, 4.8),
                      map_labels=False):
    if map_labels:
        xlabel = lookup_label(xlabel, mode=map_labels)
        ylabel = lookup_label(ylabel, mode=map_labels)
    length = len(sorted(ydatas, key=len, reverse=True)[0])
    ydata = []
    for i in range(len(ydatas)):
        ydata.append(ydatas[i] + [np.NaN] * (length - len(ydatas[i])))
        ydata[i] = np.array(ydata[i])
    yavgs = np.nanmedian(ydata, 0)
    ymaxs = np.nanmax(ydata, 0)
    ymins = np.nanmin(ydata, 0)
    xdata = get_longest_sublists(xdatas)[0]
    h = []
    fig = plt.figure()
    h.extend(plt.plot(xdata, moving_average(yavgs), label='MA of median'))
    h.extend(plt.plot(xdata, moving_average(ymaxs), label='MA of max'))
    h.extend(plt.plot(xdata, moving_average(ymins), label='MA of min'))
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend(handles=h, loc='best')

示例#3

0

显示文件

def timeseries_mean_grouped(xdatas,
                            ydatas,
                            groups,
                            xlabel,
                            ylabel,
                            figsize=(6.4, 4.8),
                            points_in_plot=200,
                            map_labels=False):
    assert type(groups) == np.ndarray
    if map_labels:
        xlabel = lookup_label(xlabel, mode=map_labels)
        ylabel = lookup_label(ylabel, mode=map_labels)
    sns.set(color_codes=True)
    plt.figure(figsize=figsize)
    legend = []
    n_groups = len(np.unique(groups))
    # if n_groups <= 6:
    #     colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups))
    #     #colors = np.reshape(np.append(colors[0::2], colors[1::2]), (6, 4))
    # else:
    colors = plt.cm.tab20(np.linspace(0, 1, n_groups))
    sns.set_style("ticks")
    for g, c in zip(np.unique(groups), colors[0:n_groups]):
        if type(g) in [str, np.str, np.str_]:
            gstr = g
        else:
            gstr = 'G{0:02d}'.format(g)
        legend.append(gstr)
        g_indices = np.where(groups == g)[0]
        ydatas_grouped = [ydatas[i] for i in g_indices]
        length = len(sorted(ydatas_grouped, key=len, reverse=True)[0])
        nsub = int(length / points_in_plot) if points_in_plot < length else 1
        ydata = np.array([
            ydata + [np.NaN] * (length - len(ydata))
            for ydata in ydatas_grouped
        ])
        # Subsample y
        ydata_subsampled = ydata[:, ::nsub] if np.prod(
            ydata.shape) > nsub else ydata
        # if ydata_subsampled[-1] != ydata[-1]:
        #     ydata_subsampled = np.append(ydata_subsampled, ydata[0,-1])
        x = [xdatas[i] for i in g_indices]
        x = get_longest_sublists(x)[0]
        if type(x) in [range, list]:
            x = np.array(x)
        # Subsample x
        x_subsampled = x[::nsub] if len(x) > nsub else x
        # if x_subsampled[-1] != x[-1]
        #     x_subsampled = np.append(x_subsampled, x[-1])
        ax = sns.tsplot(value=ylabel,
                        data=ydata_subsampled,
                        time=x_subsampled,
                        ci="sd",
                        estimator=np.mean,
                        color=c)
    lines = list(
        filter(lambda c: type(c) == mpl.lines.Line2D, ax.get_children()))
    plt.legend(handles=lines, labels=legend)
    plt.xlabel(xlabel)

示例#4

0

显示文件

def timeseries_final_distribution(datas,
                                  label,
                                  ybins='auto',
                                  figsize=(6.4, 4.8),
                                  map_labels=False):
    if map_labels:
        label = lookup_label(label, mode=map_labels)
    datas_final = [ydata[-1] for ydata in datas]
    fig, ax = plt.subplots(figsize=figsize)
    ax.hist(datas_final, bins=ybins)
    plt.xlabel(label)
    plt.ylabel('Counts')

示例#5

0

显示文件

def timeseries_distribution(xdatas,
                            ydatas,
                            xlabel,
                            ylabel,
                            xbins=100,
                            ybins=100,
                            figsize=(6.4, 4.8),
                            map_labels=False):
    if map_labels:
        xlabel = lookup_label(xlabel, mode=map_labels)
        ylabel = lookup_label(ylabel, mode=map_labels)
    plt.rcParams['image.cmap'] = 'viridis'
    # Get x and y edges spanning all values
    maxx = max([max(xdata) for xdata in xdatas])
    minx = min([min(xdata) for xdata in xdatas])
    maxy = max([max(ydata) for ydata in ydatas])
    miny = min([min(ydata) for ydata in ydatas])
    xedges = np.linspace(minx, maxx, num=xbins)
    yedges = np.linspace(miny, maxy, num=ybins)
    # Use number of bins instead if only single unique value in data
    if maxy == miny:
        yedges = ybins
    if maxx == minx:
        xedges = xbins
    H, xedges, yedges = np.histogram2d(xdatas[0],
                                       ydatas[0],
                                       bins=(xedges, yedges))
    counts = np.zeros(H.shape)
    counts += H
    for xdata, ydata in zip(xdatas[1:], ydatas[1:]):
        H, xedges, yedges = np.histogram2d(xdata, ydata, bins=(xedges, yedges))
        counts += H
    counts = counts / counts.max()
    X, Y = np.meshgrid(xedges, yedges)
    fig, ax = plt.subplots(figsize=figsize)
    plt.pcolormesh(X, Y, counts.T, linewidth=0, rasterized=True)
    plt.colorbar()
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

示例#6

0

显示文件

def violinplots(stats, keys_to_plot, groups, result_dir):
    groups_org = groups.copy()

    if keys_to_plot[0][:-4] == 'return':
        ylabel = 'NLL'
    elif keys_to_plot[0][:-4] == 'accuracy':
        ylabel = 'Classification accuracy'

    df = pd.DataFrame([])
    for k in keys_to_plot:
        # Get data and subset only those series that are done (or the one that is the longest)
        # list_of_series = [s[k].tolist() for s in stats if k in s]
        # l = length_of_longest(list_of_series)
        # indices = [i for i, series in enumerate(list_of_series) if len(series) == l]
        # groups = groups_org[indices]
        # list_of_final = [list_of_series[i][-1] for i in indices]
        list_of_series = [s[k].tolist() for s in stats if k in s]
        l = length_of_longest(list_of_series)
        indices = [
            i for i, series in enumerate(list_of_series) if len(series) == l
        ]
        groups = groups_org[indices]
        list_of_final = []
        for i in indices:
            a = np.array(list_of_series[i])
            list_of_final.append(a[~np.isnan(a)][-1])
        #
        n_groups = len(np.unique(groups))
        colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups))
        for g, c in zip(np.unique(groups), colors[0:n_groups]):
            g_indices = np.where(groups == g)[0]
            list_of_final_group = [list_of_final[i] for i in g_indices]
            label = ' '.join(
                lookup_label(k, mode='supervised').split(' ')[:1]
            )  # Only two first label words (disregard accucracy, NLL etc.)
            if k[-4:] == '_val':
                label = 'Validation (unperturbed)'
            df_new = pd.DataFrame({
                'final_val': list_of_final_group,
                'group': g,
                'label': label
            })
            df = pd.concat([df, df_new], axis=0, ignore_index=True)

    # my_order = [r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)', r'Separable (parameter)']
    # positions = [my_order.index(e) for e in df.keys() if e in my_order]

    fig, ax = plt.subplots()
    fig.set_size_inches(*plt.rcParams.get('figure.figsize'))
    g = sns.factorplot(ax=ax,
                       x="group",
                       y="final_val",
                       hue="label",
                       data=df,
                       kind="violin",
                       legend=False)
    g.despine(left=True)
    # g.set_xticklabels(rotation=10)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=10)
    ax.set_xlabel('')
    ax.set_ylabel(ylabel)
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height * 0.9])
    ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1), ncol=3)
    fig.savefig(os.path.join(
        result_dir, k[:-4] + '-final-distribution-boxplot-grouped' + '.pdf'),
                bbox_inches='tight')
    plt.close(fig)

示例#7

0

显示文件

def final_distribution(stats,
                       keys_to_plot,
                       groups,
                       result_dir,
                       include_val=True):
    groups_org = groups.copy()
    include_val_setting = include_val
    for i_key, k in enumerate(keys_to_plot):
        include_val = include_val_setting
        if include_val:
            for s in stats:
                if k[-4:] != '_unp' or k[:-4] + '_val' not in s:
                    include_val = False
        # Get data and subset only those series that are done (or the one that is the longest)
        list_of_series = [s[k].tolist() for s in stats if k in s]
        l = length_of_longest(list_of_series)
        indices = [
            i for i, series in enumerate(list_of_series) if len(series) == l
        ]
        groups = groups_org[indices]
        list_of_final = [list_of_series[i][-1] for i in indices]
        #
        fig, ax = plt.subplots()
        xlabel = lookup_label(k, mode='supervised')
        ax.set_xlabel(xlabel)
        ax.set_ylabel('CDF')
        legend = []
        n_groups = len(np.unique(groups))
        df = pd.DataFrame([])
        colors = plt.cm.gnuplot(np.linspace(0, 1, n_groups))
        for g, c in zip(np.unique(groups), colors[0:n_groups]):
            g_indices = np.where(groups == g)[0]
            list_of_final_group = [list_of_final[i] for i in g_indices]
            ax.hist(list_of_final_group,
                    alpha=0.6,
                    density=True,
                    histtype='step',
                    cumulative=True,
                    linewidth=2,
                    color=c)
            legend.append(g)
            df_new = pd.DataFrame({g: list_of_final_group})
            df = pd.concat([df, df_new], axis=1)
        ax.legend(legend, loc='northwest')
        fig.savefig(os.path.join(result_dir,
                                 k + '-final-distribution' + '.pdf'),
                    bbox_inches='tight')
        plt.close(fig)

        fig, ax = plt.subplots()
        my_order = [
            r'Isotropic (fixed $\sigma$)', r'Isotropic', r'Separable (layer)',
            r'Separable (parameter)'
        ]
        positions = [my_order.index(e) for e in df.keys() if e in my_order]
        df.boxplot(rot=10, positions=positions, showfliers=True)
        ax.xaxis.grid(False)
        ax.set_xlabel('')
        ax.set_ylabel(xlabel)
        # ax.set_ylim(auto=True)
        fig.savefig(os.path.join(result_dir,
                                 k + '-final-distribution-boxplot' + '.pdf'),
                    bbox_inches='tight')
        plt.close(fig)

        #
        if include_val:
            k_val = k[:-4] + '_val'
            list_of_series = [s[k_val].tolist() for s in stats if k_val in s]
            l = length_of_longest(list_of_series)
            indices = [
                i for i, series in enumerate(list_of_series)
                if len(series) == l
            ]
            groups = groups_org[indices]
            list_of_final = []
            for i in indices:
                a = np.array(list_of_series[i])
                list_of_final.append(a[~np.isnan(a)][-1])
            #
            fig, ax = plt.subplots()
            xlabel = lookup_label(k_val, mode='supervised')
            ax.set_xlabel(xlabel)
            ax.set_ylabel('CDF')
            legend = []
            n_groups = len(np.unique(groups))
            df = pd.DataFrame([])
            colors = plt.cm.gnuplot(np.linspace(0.1, 1, n_groups))
            for g, c in zip(np.unique(groups), colors[0:n_groups]):
                g_indices = np.where(groups == g)[0]
                list_of_final_group = [list_of_final[i] for i in g_indices]
                ax.hist(list_of_final_group,
                        alpha=0.6,
                        density=True,
                        histtype='step',
                        cumulative=True,
                        linewidth=2,
                        color=c)
                legend.append(g)
                df_new = pd.DataFrame({g: list_of_final_group})
                df = pd.concat([df, df_new], axis=1)
            fig.savefig(os.path.join(result_dir,
                                     k_val + '-final-distribution' + '.pdf'),
                        bbox_inches='tight')
            plt.close(fig)

            fig, ax = plt.subplots()
            my_order = [
                r'Isotropic (fixed $\sigma$)', r'Isotropic',
                r'Separable (layer)', r'Separable (parameter)'
            ]
            positions = [my_order.index(e) for e in df.keys() if e in my_order]
            df.boxplot(rot=10, positions=positions, showfliers=True)
            ax.xaxis.grid(False)
            ax.set_xlabel('')
            ax.set_ylabel(xlabel)
            fig.savefig(os.path.join(
                result_dir, k_val + '-final-distribution-boxplot' + '.pdf'),
                        bbox_inches='tight')
            plt.close(fig)

示例#8

0

显示文件

def plot_stats(stats_file, chkpt_dir, wide_figure=True, map_labels=False):
    """
    Plots training statistics
    - Unperturbed return
    - Average return
    - Maximum return
    - Minimum return
    - Smoothed version of the above
    - Return variance
    - Rank of unperturbed model
    - Sigma
    - Learning rate
    - Total wall clock time
    - Wall clock time per generation

    Possible x-axes are:
    - Generations
    - Episodes
    - Observations
    - Walltimes
    """

    # Plot settings
    plt.rc('font', family='sans-serif')
    plt.rc('xtick', labelsize='x-small')
    plt.rc('ytick', labelsize='x-small')
    if wide_figure:
        figsize = mpl.figure.figaspect(9 / 16)
    else:
        figsize = (6.4, 4.8)

    # Load data
    try:
        stats = load_stats(stats_file)
    except:
        return

    # Invert sign on negative returns (negative returns indicate a converted minimization problem)
    if (np.array(stats['return_max']) < 0).all():
        for k in [
                'return_unp', 'return_avg', 'return_min', 'return_max',
                'return_val'
        ]:
            stats[k] = [-s for s in stats[k]]

    # Computations/Transformations
    psuedo_start_time = stats['walltimes'].diff().mean()
    # Add pseudo start time to all times
    abs_walltimes = stats['walltimes'] + psuedo_start_time
    # Append pseudo start time to top of series and compute differences
    stats['time_per_generation'] = pd.concat(
        [pd.Series(psuedo_start_time), abs_walltimes]).diff().dropna()
    stats['parallel_fraction'] = stats['workertimes'] / stats[
        'time_per_generation']

    # Compute moving averages
    for c in stats.columns:
        if not 'Unnamed' in c and c[-3:] != '_ma':
            stats[c + '_ma'] = stats[c].rolling(window=10,
                                                min_periods=1,
                                                center=True,
                                                win_type=None).mean()

    # Plot each of the columns including moving average
    c_list = stats.columns.tolist()
    while c_list:
        c = c_list.pop()
        is_unnamed = lambda c: 'Unnamed' in c
        is_part_of_multi_series = lambda c: c.split('_')[-1].isdigit()
        is_moving_average = lambda c: c[-3:] == '_ma'
        if not is_unnamed(c) and not is_moving_average(c):
            if is_part_of_multi_series(c):
                # Find all c that are in this series
                cis = {
                    ci
                    for ci in stats.columns
                    if ci.split('_')[:-1] == c.split('_')[:-1]
                    and not is_moving_average(c)
                }
                nlines = len(cis)
                for ci in cis.difference({c}):
                    c_list.remove(ci)
                cis = sorted(list(cis))
                c = ''.join(c.split('_')[:-1])
                # Loop over them and plot into same plot
                fig, ax = plt.subplots(figsize=figsize)
                ax.set_prop_cycle('color',
                                  plt.cm.tab20(np.linspace(0, 1, nlines)))
                for ci in cis:
                    stats[ci].plot(ax=ax,
                                   linestyle='None',
                                   marker='.',
                                   alpha=0.06,
                                   label='_nolegend_')
                #ax.set_prop_cycle(None)
                for ci in cis:
                    stats[ci + '_ma'].plot(ax=ax, linestyle='-', label=ci)
                box = ax.get_position()
                ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
                ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            else:
                fig, ax = plt.subplots(figsize=figsize)
                stats[c].astype(float)
                stats[c].plot(ax=ax,
                              alpha=0.3,
                              linestyle='None',
                              marker='.',
                              label='_nolegend_')
                ax.set_prop_cycle(None)
                stats[c + '_ma'].plot(ax=ax, linestyle='-', label='_nolegend_')
                # ax.legend(loc='best')
            plt.xlabel('Iteration')
            if map_labels:
                c = lookup_label(c, mode=map_labels)
            plt.ylabel(c)
            fig.savefig(os.path.join(chkpt_dir, c + '.pdf'),
                        bbox_inches='tight')
            plt.close(fig)