示例#1
0
def runtimes_range(db,
                   output=None,
                   where=None,
                   nbins=25,
                   iqr=(0.25, 0.75),
                   **kwargs):
    # data = [t[2:] for t in db.min_max_runtimes(where=where)]
    # min_t, max_t = zip(*data)

    # lower = labmath.filter_iqr(min_t, *iqr)
    # upper = labmath.filter_iqr(max_t, *iqr)

    # min_data = np.r_[lower, upper].min()
    # max_data = np.r_[lower, upper].max()
    # bins = np.linspace(min_data, max_data, nbins)

    # Plt.hist(lower, bins, label="Min")
    # plt.hist(upper, bins, label="Max");
    title = kwargs.pop("title",
                       "Normalised distribution of min and max runtimes")
    plt.title(title)
    plt.ylabel("Frequency")
    plt.xlabel("Runtime (normalised to mean)")
    plt.legend(frameon=True)
    viz.finalise(output, **kwargs)
示例#2
0
def err_fn_speedups(db, err_fn, output=None, sort=False,
                    job="xval", **kwargs):
    """
    Plot speedup over the baseline of all classifiers for an err_fn.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for classifier in db.classification_classifiers:
        basename = ml.classifier_basename(classifier)
        performances = [row for row in
                        db.execute("SELECT speedup\n"
                                   "FROM classification_results\n"
                                   "WHERE job=? AND classifier=? AND err_fn=?",
                                   (job, classifier, err_fn))]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=basename)
    plt.plot([1 for _ in performances], "-", label="ZeroR")

    title = kwargs.pop("title", err_fn)
    ax.set_yscale("log")
    plt.title(title)
    plt.ylabel("Speedup (log)")
    plt.xlabel("Test instances")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#3
0
def err_fn_speedups(db, err_fn, output=None, sort=False, job="xval", **kwargs):
    """
    Plot speedup over the baseline of all classifiers for an err_fn.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for classifier in db.classification_classifiers:
        basename = ml.classifier_basename(classifier)
        performances = [
            row for row in db.execute(
                "SELECT speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier,
                                                              err_fn))
        ]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=basename)
    plt.plot([1 for _ in performances], "-", label="ZeroR")

    title = kwargs.pop("title", err_fn)
    ax.set_yscale("log")
    plt.title(title)
    plt.ylabel("Speedup (log)")
    plt.xlabel("Test instances")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#4
0
def classifier_speedups(db,
                        classifier,
                        output=None,
                        sort=False,
                        job="xval_classifiers",
                        **kwargs):
    """
    Plot speedup over the baseline of a classifier for each err_fn.
    """
    for err_fn in db.err_fns:
        performances = [
            row for row in db.execute(
                "SELECT speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier,
                                                              err_fn))
        ]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=err_fn)

    basename = ml.classifier_basename(classifier)
    plt.title(basename)
    plt.ylabel("Speedup")
    plt.xlabel("Test instances")
    plt.axhline(y=1, color="k")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#5
0
def pie(data, output=None, **kwargs):
    labels, values = zip(*data)
    plt.pie(values,
            labels=labels,
            autopct='%1.1f%%',
            shadow=True,
            startangle=90)
    viz.finalise(output, **kwargs)
示例#6
0
def err_fn_performance(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    results = [
        db.execute(
            "SELECT\n"
            "    GEOMEAN(performance) * 100,\n"
            "    CONFERROR(performance, .95) * 100,\n"
            "    GEOMEAN(speedup) * 100,\n"
            "    CONFERROR(speedup, .95) * 100\n"
            "FROM classification_results\n"
            "WHERE job=? AND err_fn=? AND (illegal=1 or refused=1)",
            (job, err_fn)
        ).fetchone()
        for err_fn in err_fns
    ]

    perfs, perfErrors, speedups, speedupErrors = zip(*results)

    X = np.arange(len(err_fns))
    # Bar width.
    width = (.8 / (len(results[0]) - 1))

    plt.bar(X, perfs, width=width,
            color=sns.color_palette("Reds", 1), label="Performance")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = plt.errorbar(X + .5 * width, perfs, fmt="none",
                            yerr=perfErrors, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.bar(X + width, speedups, width=width,
            color=sns.color_palette("Greens", 1), label="Speedup")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = plt.errorbar(X + 1.5 * width, speedups, fmt="none",
                            yerr=speedupErrors, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.xlim(xmin=-.2)
    plt.xticks(X + .4, err_fns)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    title = kwargs.pop("title", "Error handler performance for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#7
0
    def bar3d(self,
              output=None,
              title=None,
              figsize=(5, 4),
              zlabel=None,
              zticklabels=None,
              rotation=None,
              **kwargs):
        import matplotlib.pyplot as plt

        X, Y, dZ = [], [], []

        # Iterate over every point in space.
        for j, i in product(range(self.matrix.shape[0]),
                            range(self.matrix.shape[1])):
            if self.matrix[j][i] > 0:
                X.append(i)
                Y.append(j)
                dZ.append(self.matrix[j][i])

        num_vals = len(X)
        Z = np.zeros((num_vals, ))
        dX = np.ones((num_vals, ))
        dY = np.ones((num_vals, ))

        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.bar3d(X, Y, Z, dX, dY, dZ, **kwargs)

        # Set X axis labels
        ax.set_xticks(np.arange(len(self.c)))
        ax.set_xticklabels(self.c)
        ax.set_xlabel("Columns")

        # Set Y axis labels
        ax.set_yticks(np.arange(len(self.r)))
        ax.set_yticklabels(self.r)
        ax.set_ylabel("Rows")

        # Set Z axis labels
        if zlabel is not None:
            ax.set_zlabel(zlabel)
        if zticklabels is not None:
            ax.set_zticks(np.arange(len(zticklabels)))
            ax.set_zticklabels(zticklabels)

        # Set plot rotation.
        if rotation is not None: ax.view_init(azim=rotation)
        # Set plot title.
        if title: plt.title(title)
        plt.tight_layout()
        plt.gcf().set_size_inches(*figsize, dpi=300)
        viz.finalise(output)
示例#8
0
def _performance_plot(output, labels, values, title, color=None, **kwargs):
    fig = plt.figure()
    ax = fig.add_subplot(111)

    sns.boxplot(data=values, linewidth=1, fliersize=1)
    # sns.violinplot(data=values, inner="quartile", linewidth=.5)

    ax.set_xticklabels(labels, rotation=90)
    plt.ylim(ymin=0, ymax=1)
    plt.ylabel("Performance")
    plt.title(title)
    viz.finalise(output, **kwargs)
示例#9
0
def _performance_plot(output, labels, values, title, color=None, **kwargs):
    fig = plt.figure()
    ax = fig.add_subplot(111)

    sns.boxplot(data=values, linewidth=1, fliersize=1)
    # sns.violinplot(data=values, inner="quartile", linewidth=.5)

    ax.set_xticklabels(labels, rotation=90)
    plt.ylim(ymin=0, ymax=1)
    plt.ylabel("Performance")
    plt.title(title)
    viz.finalise(output, **kwargs)
示例#10
0
def runtimes_histogram(runtimes, output=None, color=None, **kwargs):
    mean = np.mean(runtimes)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    sns.distplot(runtimes, bins=40, kde_kws={"bw": .3}, color=color)

    ax.axvline(mean, color='0.25', linestyle='--')
    plt.xlim(min(runtimes), max(runtimes))
    plt.gca().axes.get_yaxis().set_ticks([])
    plt.xlabel("Runtime (ms)")
    plt.locator_params(axis="x", nbins=6)
    viz.finalise(output, **kwargs)
示例#11
0
def confinterval_trend(sample_counts, confintervals, output=None,
                       vlines=[], **kwargs):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(sample_counts, [y * 100 for y in confintervals])
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    for vline in vlines:
        ax.axvline(vline, color='k', linestyle='--')
    plt.ylabel("95\\% CI / mean")
    plt.xlabel("Number of samples")
    plt.xlim(min(sample_counts), max(sample_counts))
    viz.finalise(output, **kwargs)
示例#12
0
def oracle_speedups(db, output=None, **kwargs):
    data = db.oracle_speedups().values()
    #Speedups = sorted(data, reverse=True)
    Speedups = data
    X = np.arange(len(Speedups))

    plt.plot(X, Speedups)
    plt.xlim(0, len(X) - 1)
    title = kwargs.pop("title", "Attainable performance over baseline")
    plt.title(title)
    plt.xlabel("Scenarios")
    plt.ylabel("Speedup")
    viz.finalise(output, **kwargs)
示例#13
0
def oracle_speedups(db, output=None, **kwargs):
    data = db.oracle_speedups().values()
    #Speedups = sorted(data, reverse=True)
    Speedups = data
    X = np.arange(len(Speedups))

    plt.plot(X, Speedups)
    plt.xlim(0, len(X) - 1)
    title = kwargs.pop("title", "Attainable performance over baseline")
    plt.title(title)
    plt.xlabel("Scenarios")
    plt.ylabel("Speedup")
    viz.finalise(output, **kwargs)
示例#14
0
def runtimes_histogram(runtimes, output=None, color=None, **kwargs):
    mean = np.mean(runtimes)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    sns.distplot(runtimes, bins=40, kde_kws={"bw": .3},
                 color=color)

    ax.axvline(mean, color='0.25', linestyle='--')
    plt.xlim(min(runtimes), max(runtimes))
    plt.gca().axes.get_yaxis().set_ticks([])
    plt.xlabel("Runtime (ms)")
    plt.locator_params(axis="x", nbins=6)
    viz.finalise(output, **kwargs)
示例#15
0
def runtimes_variance(db, output=None, min_samples=1, where=None, **kwargs):
    # Create temporary table of scenarios and params to use, ignoring
    # those with less than "min_samples" samples.
    if "_temp" in db.tables:
        db.drop_table("_temp")

    db.execute("CREATE TABLE _temp (\n"
               "    scenario TEXT,\n"
               "    params TEXT,\n"
               "    PRIMARY KEY (scenario,params)\n"
               ")")
    query = (
        "INSERT INTO _temp\n"
        "SELECT\n"
        "    scenario,\n"
        "    params\n"
        "FROM runtime_stats\n"
        "WHERE num_samples >= ?"
    )
    if where is not None:
        query += " AND " + where
    db.execute(query, (min_samples,))

    X,Y = zip(*sorted([
        row for row in
        db.execute(
            "SELECT\n"
            "    AVG(runtime),\n"
            "    CONFERROR(runtime, .95) / AVG(runtime)\n"
            "FROM _temp\n"
            "LEFT JOIN runtimes\n"
            "    ON _temp.scenario=runtimes.scenario\n"
            "       AND _temp.params=runtimes.params\n"
            "GROUP BY _temp.scenario,_temp.params"
        )
    ], key=lambda x: x[0]))
    db.execute("DROP TABLE _temp")

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(X, Y)
    ax.set_xscale("log")

    title = kwargs.pop("title",
                       "Runtime variance as a function of mean runtime")
    plt.title(title)
    plt.ylabel("Normalised confidence interval")
    plt.xlabel("Runtime (ms)")
    plt.xlim(0, X[-1])
    plt.ylim(ymin=0)
    viz.finalise(output, **kwargs)
示例#16
0
def performance_vs_coverage(db, output=None, max_values=250, **kwargs):
    data = [
        row for row in db.execute("SELECT "
                                  "    performance AS performance, "
                                  "    coverage "
                                  "FROM param_stats")
    ]
    frame = pandas.DataFrame(data, columns=("Performance", "Legality"))
    sns.jointplot("Legality",
                  "Performance",
                  data=frame,
                  xlim=(0, 1),
                  ylim=(0, 1))
    viz.finalise(output, **kwargs)
示例#17
0
def performance_vs_coverage(db, output=None, max_values=250, **kwargs):
    data = [
        row for row in
        db.execute(
            "SELECT "
            "    performance AS performance, "
            "    coverage "
            "FROM param_stats"
        )
    ]
    frame = pandas.DataFrame(data, columns=("Performance", "Legality"))
    sns.jointplot("Legality", "Performance", data=frame,
                  xlim=(0, 1), ylim=(0, 1))
    viz.finalise(output, **kwargs)
示例#18
0
    def bar3d(self, output=None, title=None, figsize=(5,4), zlabel=None,
              zticklabels=None, rotation=None, **kwargs):
        import matplotlib.pyplot as plt
        import matplotlib.cm as cm
        from mpl_toolkits.mplot3d import Axes3D

        X, Y, dZ = [], [], []

        # Iterate over every point in space.
        for j,i in product(range(self.matrix.shape[0]),
                           range(self.matrix.shape[1])):
            if self.matrix[j][i] > 0:
                X.append(i)
                Y.append(j)
                dZ.append(self.matrix[j][i])

        num_vals = len(X)
        Z = np.zeros((num_vals,))
        dX = np.ones((num_vals,))
        dY = np.ones((num_vals,))

        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.bar3d(X, Y, Z, dX, dY, dZ, **kwargs)

        # Set X axis labels
        ax.set_xticks(np.arange(len(self.c)))
        ax.set_xticklabels(self.c)
        ax.set_xlabel("Columns")

        # Set Y axis labels
        ax.set_yticks(np.arange(len(self.r)))
        ax.set_yticklabels(self.r)
        ax.set_ylabel("Rows")

        # Set Z axis labels
        if zlabel is not None:
            ax.set_zlabel(zlabel)
        if zticklabels is not None:
            ax.set_zticks(np.arange(len(zticklabels)))
            ax.set_zticklabels(zticklabels)

        # Set plot rotation.
        if rotation is not None: ax.view_init(azim=rotation)
        # Set plot title.
        if title: plt.title(title)
        plt.tight_layout()
        plt.gcf().set_size_inches(*figsize, dpi=300)
        viz.finalise(output)
示例#19
0
def confinterval_trend(sample_counts,
                       confintervals,
                       output=None,
                       vlines=[],
                       **kwargs):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(sample_counts, [y * 100 for y in confintervals])
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    for vline in vlines:
        ax.axvline(vline, color='k', linestyle='--')
    plt.ylabel("95\\% CI / mean")
    plt.xlabel("Number of samples")
    plt.xlim(min(sample_counts), max(sample_counts))
    viz.finalise(output, **kwargs)
示例#20
0
def refused_params_by_vendor(db, output=None, **kwargs):
    data = [
        row for row in db.execute(
            "SELECT devices.vendor,"
            "    ratio_refused "
            "FROM devices LEFT JOIN ("
            "SELECT "
            "    devices.vendor AS opencl, "
            "    (Count(*) * 1.0 / ( "
            "        SELECT Count(*) "
            "        FROM runtime_stats "
            "        LEFT JOIN scenarios "
            "          ON runtime_stats.scenario=scenarios.id "
            "        LEFT JOIN devices AS dev "
            "          ON scenarios.device=dev.id "
            "        WHERE dev.vendor=devices.vendor "
            "    )) * 100 AS ratio_refused "
            "FROM refused_params "
            "LEFT JOIN scenarios "
            "  ON refused_params.scenario=scenarios.id "
            "LEFT JOIN devices "
            "  ON scenarios.device=devices.id "
            "GROUP BY devices.vendor COLLATE NOCASE )"
            "ON devices.vendor like opencl "
            "GROUP BY devices.vendor COLLATE NOCASE "
            "ORDER BY ratio_refused DESC"
        )
    ]

    labels, Y = zip(*data)
    Y = [0 if not y else y for y in Y]
    X = np.arange(len(Y))

    fig, ax = plt.subplots()
    ax.bar(X + .1, Y, width = .8)
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation=90)
    ax.set_ylabel("Ratio refused (\\%)")

    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    for tick in ax.xaxis.get_minor_ticks():
        tick.tick1line.set_markersize(0)
        tick.tick2line.set_markersize(0)
        tick.label1.set_horizontalalignment('center')

    viz.finalise(output, **kwargs)
    return data
示例#21
0
def runtimes_variance(db, output=None, min_samples=1, where=None, **kwargs):
    # Create temporary table of scenarios and params to use, ignoring
    # those with less than "min_samples" samples.
    if "_temp" in db.tables:
        db.drop_table("_temp")

    db.execute("CREATE TABLE _temp (\n"
               "    scenario TEXT,\n"
               "    params TEXT,\n"
               "    PRIMARY KEY (scenario,params)\n"
               ")")
    query = ("INSERT INTO _temp\n"
             "SELECT\n"
             "    scenario,\n"
             "    params\n"
             "FROM runtime_stats\n"
             "WHERE num_samples >= ?")
    if where is not None:
        query += " AND " + where
    db.execute(query, (min_samples, ))

    X, Y = zip(*sorted([
        row
        for row in db.execute("SELECT\n"
                              "    AVG(runtime),\n"
                              "    CONFERROR(runtime, .95) / AVG(runtime)\n"
                              "FROM _temp\n"
                              "LEFT JOIN runtimes\n"
                              "    ON _temp.scenario=runtimes.scenario\n"
                              "       AND _temp.params=runtimes.params\n"
                              "GROUP BY _temp.scenario,_temp.params")
    ],
                       key=lambda x: x[0]))
    db.execute("DROP TABLE _temp")

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(X, Y)
    ax.set_xscale("log")

    title = kwargs.pop("title",
                       "Runtime variance as a function of mean runtime")
    plt.title(title)
    plt.ylabel("Normalised confidence interval")
    plt.xlabel("Runtime (ms)")
    plt.xlim(0, X[-1])
    plt.ylim(ymin=0)
    viz.finalise(output, **kwargs)
示例#22
0
def refused_params_by_vendor(db, output=None, **kwargs):
    data = [
        row for row in db.execute(
            "SELECT devices.vendor,"
            "    ratio_refused "
            "FROM devices LEFT JOIN ("
            "SELECT "
            "    devices.vendor AS opencl, "
            "    (Count(*) * 1.0 / ( "
            "        SELECT Count(*) "
            "        FROM runtime_stats "
            "        LEFT JOIN scenarios "
            "          ON runtime_stats.scenario=scenarios.id "
            "        LEFT JOIN devices AS dev "
            "          ON scenarios.device=dev.id "
            "        WHERE dev.vendor=devices.vendor "
            "    )) * 100 AS ratio_refused "
            "FROM refused_params "
            "LEFT JOIN scenarios "
            "  ON refused_params.scenario=scenarios.id "
            "LEFT JOIN devices "
            "  ON scenarios.device=devices.id "
            "GROUP BY devices.vendor COLLATE NOCASE )"
            "ON devices.vendor like opencl "
            "GROUP BY devices.vendor COLLATE NOCASE "
            "ORDER BY ratio_refused DESC")
    ]

    labels, Y = zip(*data)
    Y = [0 if not y else y for y in Y]
    X = np.arange(len(Y))

    fig, ax = plt.subplots()
    ax.bar(X + .1, Y, width=.8)
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation=90)
    ax.set_ylabel("Ratio refused (\\%)")

    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    for tick in ax.xaxis.get_minor_ticks():
        tick.tick1line.set_markersize(0)
        tick.tick2line.set_markersize(0)
        tick.label1.set_horizontalalignment('center')

    viz.finalise(output, **kwargs)
    return data
示例#23
0
    def heatmap(self,
                output=None,
                title=None,
                figsize=(5, 4),
                xlabels=True,
                ylabels=True,
                cbar=True,
                **kwargs):
        import matplotlib.pyplot as plt
        import seaborn as sns

        new_order = list(reversed(range(self.matrix.shape[0])))
        data = self.matrix[:][new_order]

        if "square" not in kwargs:
            kwargs["square"] = True

        if xlabels == True:
            xticklabels = ["" if x % 20 else str(x) for x in self.c]
        else:
            xticklabels = xlabels
        if ylabels == True:
            yticklabels = [
                "" if x % 20 else str(x) for x in list(reversed(self.r))
            ]
        else:
            yticklabels = ylabels

        _, ax = plt.subplots(figsize=figsize)
        sns.heatmap(data,
                    xticklabels=xticklabels,
                    yticklabels=yticklabels,
                    cbar=cbar,
                    **kwargs)

        # Set labels.
        ax.set_ylabel("Rows")
        ax.set_xlabel("Columns")
        if title:
            plt.title(title)

        plt.tight_layout()
        plt.gcf().set_size_inches(*figsize, dpi=300)

        viz.finalise(output)
示例#24
0
def max_speedups(db, output=None, **kwargs):
    max_speedups,min_static,he = zip(*db.max_and_static_speedups)
    X = np.arange(len(max_speedups))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(X, max_speedups, "r", linestyle="--", label="Max")
    ax.plot(X, min_static, label="$w_{(4 \\times 4)}$")
    ax.plot(X, he, linestyle="-", label="$w_{(32 \\times 4)}$")
    # plt.ylim(ymin=0, ymax=100)
    plt.xlim(xmin=0, xmax=len(X) - 1)
    title = kwargs.pop("title", "Max attainable speedups")
    plt.title(title)
    ax.set_yscale("log")
    plt.legend(frameon=True)
    plt.ylabel("Speedup (log)")
    plt.xlabel("Scenarios (sorted by descending max speedup)")
    viz.finalise(output, **kwargs)
示例#25
0
def max_speedups(db, output=None, **kwargs):
    max_speedups, min_static, he = zip(*db.max_and_static_speedups)
    X = np.arange(len(max_speedups))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(X, max_speedups, "r", linestyle="--", label="Max")
    ax.plot(X, min_static, label="$w_{(4 \\times 4)}$")
    ax.plot(X, he, linestyle="-", label="$w_{(32 \\times 4)}$")
    # plt.ylim(ymin=0, ymax=100)
    plt.xlim(xmin=0, xmax=len(X) - 1)
    title = kwargs.pop("title", "Max attainable speedups")
    plt.title(title)
    ax.set_yscale("log")
    plt.legend(frameon=True)
    plt.ylabel("Speedup (log)")
    plt.xlabel("Scenarios (sorted by descending max speedup)")
    viz.finalise(output, **kwargs)
示例#26
0
def performance_vs_max_wgsize(ratios, output=None, color=None, **kwargs):
    title = kwargs.pop("title",
                       "Workgroup size performance vs. maximum workgroup size")
    fig = plt.figure()
    ax = fig.add_subplot(111)

    sns.boxplot(data=ratios, linewidth=1, fliersize=1)
    # sns.violinplot(data=ratios, inner="quartile", linewidth=.5)

    multiplier = kwargs.pop("multiplier", 10)
    ax.set_xticklabels([str((x+1) * multiplier) + r'\%'
                        for x in np.arange(len(ratios))])

    title = kwargs.pop("title", "")
    plt.title(title)
    plt.ylim(ymin=0, ymax=1)
    plt.ylabel("Performance")
    xlabel = kwargs.pop("xlabel", "")
    plt.xlabel(xlabel)
    viz.finalise(output, **kwargs)
示例#27
0
def runtimes_range(db, output=None, where=None, nbins=25,
                   iqr=(0.25,0.75), **kwargs):
    # data = [t[2:] for t in db.min_max_runtimes(where=where)]
    # min_t, max_t = zip(*data)

    # lower = labmath.filter_iqr(min_t, *iqr)
    # upper = labmath.filter_iqr(max_t, *iqr)

    # min_data = np.r_[lower, upper].min()
    # max_data = np.r_[lower, upper].max()
    # bins = np.linspace(min_data, max_data, nbins)

    # Plt.hist(lower, bins, label="Min")
    # plt.hist(upper, bins, label="Max");
    title = kwargs.pop("title", "Normalised distribution of min and max runtimes")
    plt.title(title)
    plt.ylabel("Frequency")
    plt.xlabel("Runtime (normalised to mean)")
    plt.legend(frameon=True)
    viz.finalise(output, **kwargs)
示例#28
0
def performance_vs_max_wgsize(ratios, output=None, color=None, **kwargs):
    title = kwargs.pop(
        "title", "Workgroup size performance vs. maximum workgroup size")
    fig = plt.figure()
    ax = fig.add_subplot(111)

    sns.boxplot(data=ratios, linewidth=1, fliersize=1)
    # sns.violinplot(data=ratios, inner="quartile", linewidth=.5)

    multiplier = kwargs.pop("multiplier", 10)
    ax.set_xticklabels(
        [str((x + 1) * multiplier) + r'\%' for x in np.arange(len(ratios))])

    title = kwargs.pop("title", "")
    plt.title(title)
    plt.ylim(ymin=0, ymax=1)
    plt.ylabel("Performance")
    xlabel = kwargs.pop("xlabel", "")
    plt.xlabel(xlabel)
    viz.finalise(output, **kwargs)
示例#29
0
def num_params_vs_accuracy(db, output=None, where=None, **kwargs):
    freqs = sorted(db.oracle_param_frequencies(normalise=True).values(),
                   reverse=True)
    acc = 0
    Data = [0] * len(freqs)
    for i,freq in enumerate(freqs):
        acc += freq * 100
        Data[i] = acc

    X = np.arange(len(Data))
    ax = plt.subplot(111)
    ax.plot(X, Data)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    plt.xlim(xmin=0, xmax=len(X) - 1)
    plt.ylim(ymin=0, ymax=100)
    title = kwargs.pop("title", "Number of workgroup sizes vs. oracle accuracy")
    plt.title(title)
    plt.ylabel("Accuracy")
    plt.xlabel("Number of distinct workgroup sizes")
    plt.legend(frameon=True)
    viz.finalise(output, **kwargs)
示例#30
0
def refused_params_by_device(db, output=None, **kwargs):
    data = [
        (fmtdevid(row[0]), round(row[1], 2))
        for row in db.execute(
                "SELECT "
                "    devices.id AS device, "
                "    (Count(*) * 1.0 / ( "
                "        SELECT Count(*) "
                "        FROM runtime_stats "
                "        LEFT JOIN scenarios "
                "          ON runtime_stats.scenario=scenarios.id "
                "        WHERE scenarios.device=devices.id "
                "    )) * 100 AS ratio_refused "
                "FROM refused_params "
                "LEFT JOIN scenarios "
                "  ON refused_params.scenario=scenarios.id "
                "LEFT JOIN devices "
                "  ON scenarios.device=devices.id "
                "GROUP BY devices.id "
                "ORDER BY ratio_refused DESC"
        )
    ]

    labels, Y = zip(*data)
    X = np.arange(len(Y))

    fig, ax = plt.subplots()
    ax.bar(X + .1, Y, width = .8)
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation=90)
    ax.set_ylabel("Ratio refused (\\%)")

    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    for tick in ax.xaxis.get_minor_ticks():
        tick.tick1line.set_markersize(0)
        tick.tick2line.set_markersize(0)
        tick.label1.set_horizontalalignment('center')

    viz.finalise(output, **kwargs)
示例#31
0
def runtime_regression(db, output=None, job="xval", **kwargs):
    """
    Plot accuracy of a classifier at predicted runtime.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)

    colors = sns.color_palette()
    i, actual = 0, []

    for i, classifier in enumerate(db.regression_classifiers):
        basename = ml.classifier_basename(classifier)
        actual, predicted = zip(*sorted([
            row for row in db.execute(
                "SELECT\n"
                "    actual,\n"
                "    predicted\n"
                "FROM runtime_regression_results\n"
                "WHERE job=? AND classifier=?", (job, classifier))
        ],
                                        key=lambda x: x[0],
                                        reverse=True))

        if basename == "ZeroR":
            ax.plot(predicted, label=basename, color=colors[i - 1])
        else:
            ax.scatter(np.arange(len(predicted)),
                       predicted,
                       label=basename,
                       color=colors[i - 1])

    ax.plot(actual, label="Actual", color=colors[i])
    ax.set_yscale("log")
    plt.xlim(0, len(actual))
    plt.legend()
    title = kwargs.pop("title", "Runtime regression for " + job)
    plt.title(title)
    plt.xlabel("Test instances (sorted by descending runtime)")
    plt.ylabel("Runtime (ms, log)")
    viz.finalise(output, **kwargs)
示例#32
0
def classifier_speedups(db, classifier, output=None, sort=False,
                        job="xval_classifiers", **kwargs):
    """
    Plot speedup over the baseline of a classifier for each err_fn.
    """
    for err_fn in db.err_fns:
        performances = [row for row in
                        db.execute("SELECT speedup\n"
                                   "FROM classification_results\n"
                                   "WHERE job=? AND classifier=? AND err_fn=?",
                                   (job, classifier, err_fn))]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=err_fn)

    basename = ml.classifier_basename(classifier)
    plt.title(basename)
    plt.ylabel("Speedup")
    plt.xlabel("Test instances")
    plt.axhline(y=1, color="k")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#33
0
def num_params_vs_accuracy(db, output=None, where=None, **kwargs):
    freqs = sorted(db.oracle_param_frequencies(normalise=True).values(),
                   reverse=True)
    acc = 0
    Data = [0] * len(freqs)
    for i, freq in enumerate(freqs):
        acc += freq * 100
        Data[i] = acc

    X = np.arange(len(Data))
    ax = plt.subplot(111)
    ax.plot(X, Data)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    plt.xlim(xmin=0, xmax=len(X) - 1)
    plt.ylim(ymin=0, ymax=100)
    title = kwargs.pop("title",
                       "Number of workgroup sizes vs. oracle accuracy")
    plt.title(title)
    plt.ylabel("Accuracy")
    plt.xlabel("Number of distinct workgroup sizes")
    plt.legend(frameon=True)
    viz.finalise(output, **kwargs)
示例#34
0
def runtime_regression(db, output=None, job="xval", **kwargs):
    """
    Plot accuracy of a classifier at predicted runtime.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)

    colors = sns.color_palette()
    i, actual = 0, []

    for i,classifier in enumerate(db.regression_classifiers):
        basename = ml.classifier_basename(classifier)
        actual, predicted = zip(*sorted([
            row for row in
            db.execute(
                "SELECT\n"
                "    actual,\n"
                "    predicted\n"
                "FROM runtime_regression_results\n"
                "WHERE job=? AND classifier=?",
                (job, classifier)
            )
        ], key=lambda x: x[0], reverse=True))

        if basename == "ZeroR":
            ax.plot(predicted, label=basename, color=colors[i - 1])
        else:
            ax.scatter(np.arange(len(predicted)), predicted, label=basename,
                       color=colors[i - 1])

    ax.plot(actual, label="Actual", color=colors[i])
    ax.set_yscale("log")
    plt.xlim(0, len(actual))
    plt.legend()
    title = kwargs.pop("title", "Runtime regression for " + job)
    plt.title(title)
    plt.xlabel("Test instances (sorted by descending runtime)")
    plt.ylabel("Runtime (ms, log)")
    viz.finalise(output, **kwargs)
示例#35
0
文件: visualise.py 项目: SpringRi/phd
def num_params(db, output=None, sample_range=None, **kwargs):
    # Range of param counts.
    sample_range = sample_range or (1, 100)

    num_instances = db.num_rows("scenario_stats")

    X = np.arange(num_instances)
    Y = np.zeros(num_instances)

    for i in range(sample_range[0], sample_range[1] + 1):
        Y[i] = db.execute(
            "SELECT (Count(*) * 1.0 / ?) * 100 "
            "FROM scenario_stats WHERE num_params >= ?",
            (num_instances, i)).fetchone()[0]

    title = kwargs.pop("title", "Parameter values count")
    plt.title(title)
    plt.xlabel("Number of parameters")
    plt.ylabel("Ratio of scenarios")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    plt.plot(X, Y)
    plt.xlim(*sample_range)
    viz.finalise(output, **kwargs)
示例#36
0
def num_params(db, output=None, sample_range=None, **kwargs):

    # Range of param counts.
    sample_range = sample_range or (1, 100)

    num_instances = db.num_rows("scenario_stats")

    X = np.arange(num_instances)
    Y = np.zeros(num_instances)

    for i in range(sample_range[0], sample_range[1] + 1):
        Y[i] = db.execute("SELECT (Count(*) * 1.0 / ?) * 100 "
                          "FROM scenario_stats WHERE num_params >= ?",
                          (num_instances, i)).fetchone()[0]

    title = kwargs.pop("title", "Parameter values count")
    plt.title(title)
    plt.xlabel("Number of parameters")
    plt.ylabel("Ratio of scenarios")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    plt.plot(X, Y)
    plt.xlim(*sample_range)
    viz.finalise(output, **kwargs)
示例#37
0
    def heatmap(self, output=None, title=None, figsize=(5,4),
                xlabels=True, ylabels=True, cbar=True, **kwargs):
        import matplotlib.pyplot as plt
        import seaborn as sns

        new_order = list(reversed(range(self.matrix.shape[0])))
        data = self.matrix[:][new_order]

        if "square" not in kwargs:
            kwargs["square"] = True

        if xlabels == True:
            xticklabels = ["" if x % 20 else str(x)
                           for x in self.c]
        else:
            xticklabels = xlabels
        if ylabels == True:
            yticklabels = ["" if x % 20 else str(x)
                           for x in list(reversed(self.r))]
        else:
            yticklabels = ylabels

        _, ax = plt.subplots(figsize=figsize)
        sns.heatmap(data,
                    xticklabels=xticklabels, yticklabels=yticklabels,
                    cbar=cbar, **kwargs)

        # Set labels.
        ax.set_ylabel("Rows")
        ax.set_xlabel("Columns")
        if title:
            plt.title(title)

        plt.tight_layout()
        plt.gcf().set_size_inches(*figsize, dpi=300)

        viz.finalise(output)
示例#38
0
def refused_params_by_device(db, output=None, **kwargs):
    data = [(fmtdevid(row[0]), round(row[1], 2)) for row in db.execute(
        "SELECT "
        "    devices.id AS device, "
        "    (Count(*) * 1.0 / ( "
        "        SELECT Count(*) "
        "        FROM runtime_stats "
        "        LEFT JOIN scenarios "
        "          ON runtime_stats.scenario=scenarios.id "
        "        WHERE scenarios.device=devices.id "
        "    )) * 100 AS ratio_refused "
        "FROM refused_params "
        "LEFT JOIN scenarios "
        "  ON refused_params.scenario=scenarios.id "
        "LEFT JOIN devices "
        "  ON scenarios.device=devices.id "
        "GROUP BY devices.id "
        "ORDER BY ratio_refused DESC")]

    labels, Y = zip(*data)
    X = np.arange(len(Y))

    fig, ax = plt.subplots()
    ax.bar(X + .1, Y, width=.8)
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation=90)
    ax.set_ylabel("Ratio refused (\\%)")

    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    for tick in ax.xaxis.get_minor_ticks():
        tick.tick1line.set_markersize(0)
        tick.tick2line.set_markersize(0)
        tick.label1.set_horizontalalignment('center')

    viz.finalise(output, **kwargs)
示例#39
0
def plot_speedups_with_clgen(benchmarks_data, clgen_data, suite="npb"):
  """
  Plot speedups of predictive models trained with and without clgen.

  Returns speedups (without and with).
  """
  # datasets: B - benchmarks, S - synthetics, BS - benchmarks + synthetics:
  B = pd.read_csv(benchmarks_data)
  B["group"] = ["B"] * len(B)

  S = pd.read_csv(clgen_data)
  S["group"] = ["S"] * len(S)

  BS = pd.concat((B, S))

  # find the ZeroR. This is the device which is most frequently optimal
  Bmask = B[B["benchmark"].str.contains(suite)]
  zeror = Counter(Bmask["oracle"]).most_common(1)[0][0]
  zeror_runtime = "runtime_" + zeror.lower()

  # get the names of the benchmarks, in the form: $suite-$version-$benchmark
  benchmark_names = sorted(set([
    re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)-", b).group(1)
    for b in B["benchmark"] if b.startswith(suite)
  ]))

  B_out, BS_out = [], []
  for benchmark in benchmark_names:
    clf = cgo13.model()
    features = get_cgo13_features
    # cross validate on baseline
    B_out += cgo13.leave_one_benchmark_out(clf, features, B, benchmark)
    # reset model
    clf = cgo13.model()
    # repeate cross-validation with synthetic kernels
    BS_out += cgo13.leave_one_benchmark_out(clf, features, BS, benchmark)

  # create results frame
  R_out = []
  for b, bs in zip(B_out, BS_out):
    # get runtimes of device using predicted device
    b_p_runtime = b["runtime_" + b["p"].lower()]
    bs_p_runtime = bs["runtime_" + bs["p"].lower()]

    # speedup is the ratio of runtime using the predicted device
    # over runtime using ZeroR device
    b["p_speedup"] = b_p_runtime / b[zeror_runtime]
    bs["p_speedup"] = bs_p_runtime / bs[zeror_runtime]

    if "training" in benchmarks_data:
      # $benchmark
      group = escape_benchmark_name(b["benchmark"])
    else:
      # $benchmark.$dataset
      group = re.sub(r"[^-]+-[0-9\.]+-([^-]+)-.+", r"\1",
                     b["benchmark"]) + "." + b["dataset"]
    b["group"] = group
    bs["group"] = group

    # set the training data type
    b["training"] = "Grewe et al."
    bs["training"] = "w. CLgen"

    R_out.append(b)
    R_out.append(bs)

  R = pd.DataFrame(R_out)

  b_mask = R["training"] == "Grewe et al."
  bs_mask = R["training"] == "w. CLgen"

  B_speedup = labmath.mean(R[b_mask].groupby(["group"])["p_speedup"].mean())
  BS_speedup = labmath.mean(R[bs_mask].groupby(["group"])["p_speedup"].mean())

  print("  #. benchmarks:                  ",
        len(set(B["benchmark"])), "kernels,", len(B), "observations")
  print("  #. synthetic:                   ",
        len(set(S["benchmark"])), "kernels,", len(S), "observations")
  print()
  print("  ZeroR device:                    {}".format(zeror))
  print()
  print("  Speedup of Grewe et al.:         {:.2f} x".format(B_speedup))
  print("  Speedup w. CLgen:                {:.2f} x".format(BS_speedup))

  R = R.append({  # average bars
    "group": "Average",
    "p_speedup": B_speedup,
    "training": "Grewe et al."
  }, ignore_index=True)
  R = R.append({
    "group": "Average",
    "p_speedup": BS_speedup,
    "training": "w. CLgen"
  }, ignore_index=True)

  R["p_speedup"] -= 1  # negative offset so that bars start at 1

  # colors
  palette = sns.cubehelix_palette(len(set(R["training"])),
                                  rot=-.4, light=.85, dark=.35)

  ax = sns.barplot(
      x="group", y="p_speedup", data=R, ci=None, hue="training",
      palette=palette)
  plt.ylabel("Speedup")
  plt.xlabel("")

  plt.axhline(y=0, color="k", lw=1)  # speedup line
  plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1,
              linestyle="--")  # average line

  ax.get_legend().set_title("")  # no legend title
  plt.legend(loc='upper right')
  ax.get_legend().draw_frame(True)

  # plot shape and size
  figsize = (9, 2.2)
  if "nvidia" in benchmarks_data:
    typecast = int;
    plt.ylim(-1, 16)
  elif "training" in benchmarks_data:
    typecast = float;
    figsize = (7, 3.2)
  else:
    typecast = float

  # counter negative offset:
  ax.set_yticklabels([typecast(i) + 1 for i in ax.get_yticks()])

  plt.setp(ax.get_xticklabels(), rotation=90)

  viz.finalise(figsize=figsize, tight=True)
  return B_speedup, BS_speedup
示例#40
0
    def trisurf(self, output=None, title=None, figsize=(5,4), zlabel=None,
                zticklabels=None, rotation=None, **kwargs):
        import matplotlib.pyplot as plt
        import matplotlib.cm as cm
        from mpl_toolkits.mplot3d import Axes3D

        num_vals = self.matrix.shape[0] * self.matrix.shape[1]
        if num_vals < 3:
            io.error("Cannot create trisurf of", num_vals, "values")
            return

        X = np.zeros((num_vals,))
        Y = np.zeros((num_vals,))
        Z = np.zeros((num_vals,))

        # Iterate over every point in space.
        for j,i in product(range(self.matrix.shape[0]),
                           range(self.matrix.shape[1])):
            # Convert point to list index.
            index = j * self.matrix.shape[1] + i
            X[index] = i
            Y[index] = j
            Z[index] = self.matrix[j][i]

        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_trisurf(X, Y, Z, cmap=cm.jet, **kwargs)

        # Set X axis labels
        xticks = []
        xticklabels = []
        for i,c in enumerate(self.c):
            if not len(xticks) or c % 20 == 0:
                xticks.append(i)
                xticklabels.append(c)
        ax.set_xticks(xticks)
        ax.set_xticklabels(xticklabels)
        ax.set_xlabel("$w_c$")

        # Set Y axis labels
        yticks = []
        yticklabels = []
        for i,c in enumerate(self.c):
            if not len(yticks) or c % 20 == 0:
                yticks.append(i)
                yticklabels.append(c)
        ax.set_yticks(yticks)
        ax.set_yticklabels(yticklabels)
        ax.set_ylabel("$w_r$")

        # Set Z axis labels
        if zlabel is not None:
            ax.set_zlabel(zlabel)
        if zticklabels is not None:
            ax.set_zticks(np.arange(len(zticklabels)))
            ax.set_zticklabels(zticklabels)

        # Set plot rotation.
        if rotation is not None: ax.view_init(azim=rotation)
        # Set plot title.
        if title: plt.title(title)
        plt.tight_layout()
        plt.gcf().set_size_inches(*figsize, dpi=300)
        viz.finalise(output)
示例#41
0
def speedup_classification(db, output=None, job="xval", **kwargs):
    """
    Plot performance of classification using speedup regression.
    """
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM speedup_classification_results\n"
        "WHERE job=? GROUP BY classifier", (job, ))
    results = []
    for classifier, count in query:
        basename = ml.classifier_basename(classifier)
        correct = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100\n"
            "FROM speedup_classification_results\n"
            "WHERE job=? AND classifier=?",
            (count, job, classifier)).fetchone()[0]
        # Get a list of mean speedups for each err_fn.
        speedups = [
            row for row in db.execute(
                "SELECT\n"
                "    AVG(speedup) * 100,\n"
                "    CONFERROR(speedup, .95) * 100,\n"
                "    AVG(performance) * 100,\n"
                "    CONFERROR(performance, .95) * 100\n"
                "FROM speedup_classification_results\n"
                "WHERE job=? AND classifier=?", (job, classifier)).fetchone()
        ]

        results.append([basename, correct] + speedups)

    # Zip into lists.
    labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results)

    X = np.arange(len(labels))
    # Bar width.
    width = (.8 / (len(results[0]) - 1))

    plt.bar(X + width,
            correct,
            width=width,
            color=sns.color_palette("Blues", 1),
            label="Accuracy")
    plt.bar(X + 2 * width,
            speedups,
            width=width,
            color=sns.color_palette("Greens", 1),
            label="Speedup")
    plt.bar(X + 3 * width,
            perfs,
            width=width,
            color=sns.color_palette("Oranges", 1),
            label="Performance")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = plt.errorbar(X + 2.5 * width,
                              speedups,
                              fmt="none",
                              yerr=yerrs,
                              capsize=3,
                              ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)
    _, caps, _ = plt.errorbar(X + 3.5 * width,
                              perfs,
                              fmt="none",
                              yerr=perf_yerrs,
                              capsize=3,
                              ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.xlim(xmin=-.2)
    plt.xticks(X + .4, labels)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    title = kwargs.pop(
        "title",
        "Classification results for " + job + " using speedup regression")
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#42
0
def test_finalise_tight():
  _MakeTestPlot()
  viz.finalise("/tmp/labm8.png", tight=True)
  assert fs.exists("/tmp/labm8.png")
  fs.rm("/tmp/labm8.png")
示例#43
0
def main():
    db = _db.Database(experiment.ORACLE_PATH)
    ml.start()

    # Delete any old stuff.
    fs.rm(experiment.IMG_ROOT + "/*")
    fs.rm(experiment.TAB_ROOT + "/*")

    # Make directories
    fs.mkdir(experiment.TAB_ROOT)
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/bars"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/heatmap"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/trisurf"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/datasets"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/datasets"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/datasets"))

    visualise.pie(db.num_scenarios_by_device,
                  fs.path(experiment.IMG_ROOT, "num_sceanrios_by_device"))
    visualise.pie(db.num_runtime_stats_by_device,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_device"))
    visualise.pie(db.num_scenarios_by_dataset,
                  fs.path(experiment.IMG_ROOT, "num_sceanrios_by_dataset"))
    visualise.pie(db.num_runtime_stats_by_dataset,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_dataset"))
    visualise.pie(db.num_runtime_stats_by_kernel,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel"))
    visualise.pie(db.num_runtime_stats_by_kernel,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel"))

    # Per-scenario plots
    for row in db.scenario_properties:
        scenario,device,kernel,north,south,east,west,max_wgsize,width,height,tout = row
        title = ("{device}: {kernel}[{n},{s},{e},{w}]\n"
                 "{width} x {height} {type}s"
                 .format(device=text.truncate(device, 18), kernel=kernel,
                         n=north, s=south, e=east, w=west,
                         width=width, height=height, type=tout))
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/heatmap/{id}.png".format(id=scenario))
        space = _space.ParamSpace.from_dict(db.perf_scenario(scenario))
        max_c = min(25, len(space.c))
        max_r = min(25, len(space.r))
        space.reshape(max_c=max_c, max_r=max_r)

        # Heatmaps.
        mask = _space.ParamSpace(space.c, space.r)
        for j in range(len(mask.r)):
            for i in range(len(mask.c)):
                if space.matrix[j][i] == 0:
                    r, c = space.r[j], space.c[i]
                    # TODO: Get values from refused_params table.
                    if r * c >= max_wgsize:
                        # Illegal
                        mask.matrix[j][i] = -1
                    else:
                        # Refused
                        db.execute("INSERT OR IGNORE INTO refused_params VALUES(?,?)",
                                   (scenario, hash_params(c, r)))
                        space.matrix[j][i] = -1
                        mask.matrix[j][i] = 1

        db.commit()
        new_order = list(reversed(range(space.matrix.shape[0])))
        data = space.matrix[:][new_order]

        figsize=(12,6)

        _, ax = plt.subplots(1, 2, figsize=figsize, sharey=True)
        sns.heatmap(data, ax=ax[0], vmin=-1, vmax=1,
                    xticklabels=space.c,
                    yticklabels=list(reversed(space.r)), square=True)

        ax[0].set_title(title)

        new_order = list(reversed(range(mask.matrix.shape[0])))
        data = mask.matrix[:][new_order]

        sns.heatmap(data, ax=ax[1], vmin=-1, vmax=1,
                    xticklabels=space.c,
                    yticklabels=list(reversed(space.r)), square=True)

        # Set labels.
        ax[0].set_ylabel("Rows")
        ax[0].set_xlabel("Columns")
        ax[1].set_ylabel("Rows")
        ax[1].set_xlabel("Columns")

        # plt.tight_layout()
        # plt.gcf().set_size_inches(*figsize, dpi=300)

        viz.finalise(output)

        # 3D bars.
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/bars/{id}.png".format(id=scenario))
        space.bar3d(output=output, title=title, zlabel="Performance",
                    rotation=45)

        # Trisurfs.
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/trisurf/{id}.png".format(id=scenario))
        space.trisurf(output=output, title=title, zlabel="Performance",
                      rotation=45)

    #####################
    # ML Visualisations #
    #####################
    #features_tab(db, experiment.TAB_ROOT)

    visualise_classification_job(db, "xval")
    visualise_classification_job(db, "arch")
    visualise_classification_job(db, "xval_real")
    visualise_classification_job(db, "synthetic_real")

    # Runtime regression accuracy.
    visualise_regression_job(db, "xval")
    visualise_regression_job(db, "arch")
    visualise_regression_job(db, "xval_real")
    visualise_regression_job(db, "synthetic_real")

    # Whole-dataset plots
    visualise.runtimes_variance(db, fs.path(experiment.IMG_ROOT,
                                            "runtime_variance.png"),
                                min_samples=30)
    visualise.num_samples(db, fs.path(experiment.IMG_ROOT,
                                      "num_samples.png"))
    visualise.runtimes_range(db, fs.path(experiment.IMG_ROOT,
                                         "runtimes_range.png"))
    visualise.max_speedups(db, fs.path(experiment.IMG_ROOT,
                                       "max_speedups.png"))
    visualise.kernel_performance(db, fs.path(experiment.IMG_ROOT,
                                             "kernel_performance.png"))
    visualise.device_performance(db, fs.path(experiment.IMG_ROOT,
                                             "device_performance.png"))
    visualise.dataset_performance(db, fs.path(experiment.IMG_ROOT,
                                              "dataset_performance.png"))
    visualise.num_params_vs_accuracy(db, fs.path(experiment.IMG_ROOT,
                                                 "num_params_vs_accuracy.png"))
    visualise.performance_vs_coverage(db,
                                      fs.path(experiment.IMG_ROOT,
                                              "performance_vs_coverage.png"))
    visualise.performance_vs_max_wgsize(
        db, fs.path(experiment.IMG_ROOT, "performance_vs_max_wgsize.png")
    )
    visualise.performance_vs_wgsize(db, fs.path(experiment.IMG_ROOT,
                                                "performance_vs_wgsize.png"))
    visualise.performance_vs_wg_c(db, fs.path(experiment.IMG_ROOT,
                                              "performance_vs_wg_c.png"))
    visualise.performance_vs_wg_r(db, fs.path(experiment.IMG_ROOT,
                                              "performance_vs_wg_r.png"))
    visualise.max_wgsizes(db, fs.path(experiment.IMG_ROOT, "max_wgsizes.png"))
    visualise.oracle_speedups(db, fs.path(experiment.IMG_ROOT,
                                          "oracle_speedups.png"))

    visualise.coverage(db,
                       fs.path(experiment.IMG_ROOT, "coverage/coverage.png"))
    visualise.safety(db, fs.path(experiment.IMG_ROOT, "safety/safety.png"))
    visualise.oracle_wgsizes(db, fs.path(experiment.IMG_ROOT, "oracle/all.png"))

    # Per-device plots
    for i,device in enumerate(db.devices):
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE device='{0}')"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}.png".format(i))
        visualise.coverage(db, output=output, where=where, title=device)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=device)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where, title=device)

        where = ("scenario IN (\n"
                 "    SELECT id from scenarios WHERE device='{0}'\n"
                 ") AND scenario IN (\n"
                 "    SELECT id FROM scenarios WHERE kernel IN (\n"
                 "        SELECT id FROM kernel_names WHERE synthetic=0\n"
                 "    )\n"
                 ")"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}_real.png".format(i))
        visualise.coverage(db, output=output, where=where,
                           title=device + ", real")
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}_real.png".format(i))
        visualise.safety(db, output, where=where,
                         title=device + ", real")
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}_real.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where,
                                 title=device + ", real")


        where = ("scenario IN (\n"
                 "    SELECT id from scenarios WHERE device='{0}'\n"
                 ") AND scenario IN (\n"
                 "    SELECT id FROM scenarios WHERE kernel IN (\n"
                 "        SELECT id FROM kernel_names WHERE synthetic=1\n"
                 "    )\n"
                 ")"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}_synthetic.png".format(i))
        visualise.coverage(db, output=output, where=where,
                           title=device + ", synthetic")
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}_synthetic.png".format(i))
        visualise.safety(db, output, where=where,
                         title=device + ", synthetic")
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}_synthetic.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where,
                                 title=device + ", synthetic")

    # Per-kernel plots
    for kernel,ids in db.lookup_named_kernels().iteritems():
        id_wrapped = ['"' + id + '"' for id in ids]
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE kernel IN ({0}))"
                 .format(",".join(id_wrapped)))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/kernels/{0}.png".format(kernel))
        visualise.coverage(db, output=output, where=where, title=kernel)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/kernels/{0}.png".format(kernel))
        visualise.safety(db, output=output, where=where, title=kernel)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/kernels/{0}.png".format(kernel))
        visualise.safety(db, output=output, where=where, title=kernel)

    # Per-dataset plots
    for i,dataset in enumerate(db.datasets):
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE dataset='{0}')"
                 .format(dataset))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/datasets/{0}.png".format(i))
        visualise.coverage(db, output, where=where, title=dataset)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/datasets/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=dataset)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/datasets/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=dataset)

    ml.stop()
示例#44
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier", (job, base_err_fn))
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute(
        "SELECT Count(*) * 1.0 / 3 FROM classification_results "
        "WHERE job=? AND actual=?", (job, baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?", (job, baseline))
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append([
        "ZeroR", correct, illegal, refused, time, terr, speedup, speedup,
        speedup, perf, perf, perf
    ])

    # Get results
    for classifier, count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] +
                       speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(
        *[(text.truncate(result[0], 40), result[1], result[2], result[3],
           result[4], result[5]) for result in results])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             time,
                             fmt="none",
                             yerr=terr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1,
           illegal,
           width=width,
           color=sns.color_palette("Reds", 1),
           label="Illegal")
    ax.bar(X + .1 + width,
           refused,
           width=width,
           color=sns.color_palette("Oranges", 1),
           label="Refused")
    ax.bar(X + .1 + 2 * width,
           correct,
           width=width,
           color=sns.color_palette("Blues", 1),
           label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors = sns.color_palette("Greens", len(err_fns))
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               speedups,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 speedups,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors = sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               perfs,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 perfs,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)

    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#45
0
def regression_classification(db, output=None, job="xval",
                              table="runtime_classification_results",
                              **kwargs):
    """
    Plot performance of classification using runtime regression.
    """
    jobs = {
        "xval": "10-fold",
        "synthetic_real": "Synthetic",
        "arch": "Device",
        "kern": "Kernel",
        "data": "Dataset",
    }

    results = []
    for job in jobs:
        speedup, serr, perf, perr, time, terr, correct = db.execute(
            "SELECT "
            "  AVG(speedup), CONFERROR(speedup, .95), "
            "  AVG(performance) * 100, CONFERROR(performance, .95) * 100, "
            "  AVG(time) + 2.5, CONFERROR(time, .95), "
            "  AVG(correct) * 100 "
            "FROM {} WHERE job=?".format(table),
            (job,)
        ).fetchone()
        results.append([job, speedup, serr, perf, perr, time, terr, correct])

    # Zip into lists.
    labels, speedup, serr, perf, perr, time, terr, correct = zip(*results)
    labels = [jobs[x] for x in jobs]

    # Add averages.
    labels.append(r'\textbf{Average}')
    speedup += (labmath.mean(speedup),)
    serr += (labmath.mean(serr),)
    perf += (labmath.mean(perf),)
    perr += (labmath.mean(perr),)
    time += (labmath.mean(time),)
    terr += (labmath.mean(terr),)
    correct += (labmath.mean(correct),)

    X = np.arange(len(labels))

    width = .8

    # PLOT TIMES
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 150)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Classification time (ms)")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, time,
                           fmt="none", yerr=terr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # SPEEDUPS
    ax = plt.subplot(4, 1, 3)
    ax.bar(X + .1, speedup, width=width, color=sns.color_palette("Greens"))
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 7)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Speedup")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, speedup,
                           fmt="none", yerr=serr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # PERFORMANCE
    ax = plt.subplot(4, 1, 4)
    ax.bar(X + .1, perf, width=width, color=sns.color_palette("Blues"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Performance")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 100)
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, perf,
                           fmt="none", yerr=perr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # ACCURACY
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, correct, width=width, color=sns.color_palette("Reds"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Accuracy")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 12)

    viz.finalise(output, **kwargs)
示例#46
0
def regression_classification(db,
                              output=None,
                              job="xval",
                              table="runtime_classification_results",
                              **kwargs):
    """
    Plot performance of classification using runtime regression.
    """
    jobs = {
        "xval": "10-fold",
        "synthetic_real": "Synthetic",
        "arch": "Device",
        "kern": "Kernel",
        "data": "Dataset",
    }

    results = []
    for job in jobs:
        speedup, serr, perf, perr, time, terr, correct = db.execute(
            "SELECT "
            "  AVG(speedup), CONFERROR(speedup, .95), "
            "  AVG(performance) * 100, CONFERROR(performance, .95) * 100, "
            "  AVG(time) + 2.5, CONFERROR(time, .95), "
            "  AVG(correct) * 100 "
            "FROM {} WHERE job=?".format(table), (job, )).fetchone()
        results.append([job, speedup, serr, perf, perr, time, terr, correct])

    # Zip into lists.
    labels, speedup, serr, perf, perr, time, terr, correct = zip(*results)
    labels = [jobs[x] for x in jobs]

    # Add averages.
    labels.append(r'\textbf{Average}')
    speedup += (labmath.mean(speedup), )
    serr += (labmath.mean(serr), )
    perf += (labmath.mean(perf), )
    perr += (labmath.mean(perr), )
    time += (labmath.mean(time), )
    terr += (labmath.mean(terr), )
    correct += (labmath.mean(correct), )

    X = np.arange(len(labels))

    width = .8

    # PLOT TIMES
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 150)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Classification time (ms)")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             time,
                             fmt="none",
                             yerr=terr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # SPEEDUPS
    ax = plt.subplot(4, 1, 3)
    ax.bar(X + .1, speedup, width=width, color=sns.color_palette("Greens"))
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 7)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Speedup")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             speedup,
                             fmt="none",
                             yerr=serr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # PERFORMANCE
    ax = plt.subplot(4, 1, 4)
    ax.bar(X + .1, perf, width=width, color=sns.color_palette("Blues"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Performance")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 100)
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             perf,
                             fmt="none",
                             yerr=perr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # ACCURACY
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, correct, width=width, color=sns.color_palette("Reds"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Accuracy")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 12)

    viz.finalise(output, **kwargs)
示例#47
0
def speedup_classification(db, output=None, job="xval", **kwargs):
    """
    Plot performance of classification using speedup regression.
    """
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM speedup_classification_results\n"
        "WHERE job=? GROUP BY classifier", (job,)
    )
    results = []
    for classifier,count in query:
        basename = ml.classifier_basename(classifier)
        correct = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100\n"
            "FROM speedup_classification_results\n"
            "WHERE job=? AND classifier=?",
            (count, job, classifier)
        ).fetchone()[0]
        # Get a list of mean speedups for each err_fn.
        speedups = [
            row for row in
            db.execute(
                "SELECT\n"
                "    AVG(speedup) * 100,\n"
                "    CONFERROR(speedup, .95) * 100,\n"
                "    AVG(performance) * 100,\n"
                "    CONFERROR(performance, .95) * 100\n"
                "FROM speedup_classification_results\n"
                "WHERE job=? AND classifier=?",
                (job, classifier)
            ).fetchone()
        ]

        results.append([basename, correct] + speedups)

    # Zip into lists.
    labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results)

    X = np.arange(len(labels))
    # Bar width.
    width = (.8 / (len(results[0]) - 1))

    plt.bar(X + width, correct, width=width,
            color=sns.color_palette("Blues", 1), label="Accuracy")
    plt.bar(X + 2 * width, speedups, width=width,
            color=sns.color_palette("Greens", 1), label="Speedup")
    plt.bar(X + 3 * width, perfs, width=width,
            color=sns.color_palette("Oranges", 1), label="Performance")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = plt.errorbar(X + 2.5 * width, speedups, fmt="none",
                            yerr=yerrs, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)
    _,caps,_ = plt.errorbar(X + 3.5 * width, perfs, fmt="none",
                            yerr=perf_yerrs, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.xlim(xmin=-.2)
    plt.xticks(X + .4, labels)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%' ))

    title = kwargs.pop("title",
                       "Classification results for " + job +
                       " using speedup regression")
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#48
0
def pie(data, output=None, **kwargs):
    labels, values = zip(*data)
    plt.pie(values, labels=labels, autopct='%1.1f%%', shadow=True,
            startangle=90)
    viz.finalise(output, **kwargs)
示例#49
0
def plot_speedups_extended_model_2platform(platform_a, platform_b):
  """
  Plot speedup of extended model over Grewe et al for 2 platforms
  """
  aB = pd.read_csv(platform_a[0])
  aB["synthetic"] = np.zeros(len(aB))
  bB = pd.read_csv(platform_b[0])
  bB["synthetic"] = np.zeros(len(bB))
  B = pd.concat((aB, bB))

  aS = pd.read_csv(platform_a[1])
  aS["synthetic"] = np.ones(len(aS))
  bS = pd.read_csv(platform_b[1])
  bS["synthetic"] = np.ones(len(bS))
  S = pd.concat((aS, bS))

  aBS = pd.concat((aB, aS))
  bBS = pd.concat((bB, bS))
  BS = pd.concat((B, S))

  assert (len(B) == len(aB) + len(bB))  # sanity checks
  assert (len(S) == len(aS) + len(bS))
  assert (len(BS) == len(aBS) + len(bBS))

  # get benchmark names: <suite>-<benchmark>
  benchmark_names = sorted(set([
    re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)", b).group(1)
    for b in B["benchmark"]
  ]))

  # perform cross-validation
  B_out = []
  for i, benchmark in enumerate(benchmark_names):
    print("\ranalyzing", i + 1, benchmark, end="")
    cgo13_clf, our_clf = cgo13.model(), get_our_model()
    cgo13_features, our_features = get_cgo13_features, get_our_features

    # cross validate on Grewe et al. and our model
    tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features,
                        aBS, aBS, benchmark)
    for d in tmp: d["platform"] = "AMD Tahiti 7970"
    B_out += tmp

    # reset models
    cgo13_clf, our_clf = cgo13.model(), get_our_model()

    # same as before, on other platform:
    tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features,
                        bBS, bBS, benchmark)
    for d in tmp: d["platform"] = "NVIDIA GTX 970"
    B_out += tmp
  print()

  # create results frame
  R_out = []
  # get runtimes of device using predicted device
  for b in B_out:
    p1_runtime = b["runtime_" + b["p1"].lower()]
    p2_runtime = b["runtime_" + b["p2"].lower()]

    # speedup is the ratio of runtime using our predicted device
    # over runtime using CGO13 predicted device.
    b["p_speedup"] = p2_runtime / p1_runtime

    # get the benchmark name
    b["group"] = escape_benchmark_name(b["benchmark"])

    R_out.append(b)
  R = pd.DataFrame(R_out)

  improved = R[R["p_speedup"] > 1]

  Amask = R["platform"] == "AMD Tahiti 7970"
  Bmask = R["platform"] == "NVIDIA GTX 970"
  a = R[Amask]
  b = R[Bmask]

  a_speedups = a.groupby(["group"])["p_speedup"].mean()
  b_speedups = b.groupby(["group"])["p_speedup"].mean()

  a_speedup = labmath.mean(a_speedups)
  b_speedup = labmath.mean(b_speedups)

  assert (len(R) == len(a) + len(b))  # sanity-check

  print("  #. benchmarks:          ",
        len(set(B["benchmark"])), "kernels,", len(B), "observations")
  print("  #. synthetic:           ",
        len(set(S["benchmark"])), "kernels,", len(S), "observations")
  print()
  print("  Speedup on AMD:          {:.2f} x".format(a_speedup))
  print("  Speedup on NVIDIA:       {:.2f} x".format(b_speedup))

  palette = sns.cubehelix_palette(
      len(set(R["platform"])), start=4, rot=.8, light=.8, dark=.3)

  R = R.append({  # average bars
    "group": "Average",
    "p_speedup": a_speedup,
    "platform": "AMD Tahiti 7970"
  }, ignore_index=True)
  R = R.append({
    "group": "Average",
    "p_speedup": b_speedup,
    "platform": "NVIDIA GTX 970"
  }, ignore_index=True)

  R["p_speedup"] -= 1  # negative offset so that bars start at 1

  ax = sns.barplot(x="group", y="p_speedup", hue="platform", data=R,
                   palette=palette, ci=None)

  plt.ylabel("Speedup over Grewe et al.");
  plt.xlabel("")

  plt.axhline(y=0, color="k", lw=1)
  plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--")
  plt.ylim(-1, 9)
  plt.setp(ax.get_xticklabels(), rotation=90)  # rotate x ticks
  ax.get_legend().set_title("")  # legend
  plt.legend(loc='upper right')

  # counter negative offset
  ax.set_yticklabels([int(i) + 1 for i in ax.get_yticks()])

  ax.get_legend().draw_frame(True)

  viz.finalise(figsize=(9, 4), tight=True)
示例#50
0
def err_fn_performance(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    results = [
        db.execute(
            "SELECT\n"
            "    GEOMEAN(performance) * 100,\n"
            "    CONFERROR(performance, .95) * 100,\n"
            "    GEOMEAN(speedup) * 100,\n"
            "    CONFERROR(speedup, .95) * 100\n"
            "FROM classification_results\n"
            "WHERE job=? AND err_fn=? AND (illegal=1 or refused=1)",
            (job, err_fn)).fetchone() for err_fn in err_fns
    ]

    perfs, perfErrors, speedups, speedupErrors = zip(*results)

    X = np.arange(len(err_fns))
    # Bar width.
    width = (.8 / (len(results[0]) - 1))

    plt.bar(X,
            perfs,
            width=width,
            color=sns.color_palette("Reds", 1),
            label="Performance")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = plt.errorbar(X + .5 * width,
                              perfs,
                              fmt="none",
                              yerr=perfErrors,
                              capsize=3,
                              ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.bar(X + width,
            speedups,
            width=width,
            color=sns.color_palette("Greens", 1),
            label="Speedup")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = plt.errorbar(X + 1.5 * width,
                              speedups,
                              fmt="none",
                              yerr=speedupErrors,
                              capsize=3,
                              ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.xlim(xmin=-.2)
    plt.xticks(X + .4, err_fns)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    title = kwargs.pop("title", "Error handler performance for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#51
0
def plot_speedups_extended_model(benchmarks_data, clgen_data):
  """
  Plots speedups of extended model over Grewe et al

  Returns: speedup
  """
  B = pd.read_csv(benchmarks_data)
  B["synthetic"] = np.zeros(len(B))

  S = pd.read_csv(clgen_data)
  S["synthetic"] = np.ones(len(S))

  BS = pd.concat((B, S))

  assert (len(BS) == len(B) + len(S))

  # get benchmark names: <suite>-<benchmark>
  benchmark_names = sorted(set([
    re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)", b).group(1)
    for b in B["benchmark"]
  ]))

  # perform cross-validation
  B_out = []
  for i, benchmark in enumerate(benchmark_names):
    print("\ranalyzing", i + 1, benchmark, end="")
    cgo13_clf, our_clf = cgo13.model(), get_our_model()
    cgo13_features, our_features = get_cgo13_features, get_our_features

    # cross validate on Grewe et al. and our model
    tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features,
                        BS, BS, benchmark)
    B_out += tmp
  print()

  # create results frame
  R_out = []
  # get runtimes of device using predicted device
  for b in B_out:
    p1_runtime = b["runtime_" + b["p1"].lower()]
    p2_runtime = b["runtime_" + b["p2"].lower()]

    # speedup is the ratio of runtime using our predicted device
    # over runtime using CGO13 predicted device.
    b["p_speedup"] = p2_runtime / p1_runtime

    # get the benchmark name
    b["group"] = escape_benchmark_name(b["benchmark"])

    R_out.append(b)
  R = pd.DataFrame(R_out)

  improved = R[R["p_speedup"] > 1]

  speedups = R.groupby(["group"])["p_speedup"].mean()
  speedup = labmath.mean(speedups)

  print("  #. benchmarks:          ",
        len(set(B["benchmark"])), "kernels,", len(B), "observations")
  print("  #. synthetic:           ",
        len(set(S["benchmark"])), "kernels,", len(S), "observations")
  print()
  print("  Speedup:                 {:.2f} x".format(speedup))

  palette = sns.cubehelix_palette(1, start=4, rot=.8, light=.8, dark=.3)

  R = R.append({  # average bar
    "group": "Average",
    "p_speedup": speedup
  }, ignore_index=True)

  R["p_speedup"] -= 1  # negative offset so that bars start at 1

  ax = sns.barplot(x="group", y="p_speedup", data=R,
                   palette=palette, ci=None)

  plt.ylabel("Speedup over Grewe et al.");
  plt.xlabel("")

  plt.axhline(y=0, color="k", lw=1)
  plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--")
  plt.ylim(-1, 9)
  plt.setp(ax.get_xticklabels(), rotation=90)  # rotate x ticks

  # counter negative offset
  ax.set_yticklabels([int(i) + 1 for i in ax.get_yticks()])

  viz.finalise(figsize=(7, 3.7), tight=True)
  return speedup
示例#52
0
 def test_finalise_tight(self):
     self._mkplot()
     viz.finalise("/tmp/labm8.png", tight=True)
     self.assertTrue(fs.exists("/tmp/labm8.png"))
     fs.rm("/tmp/labm8.png")
示例#53
0
def test_finalise():
  _MakeTestPlot()
  viz.finalise("/tmp/labm8.png")
  assert fs.exists("/tmp/labm8.png")
  fs.rm("/tmp/labm8.png")
示例#54
0
 def test_finalise_figsize(self):
     self._mkplot()
     viz.finalise("/tmp/labm8.png", figsize=(10, 5))
     self.assertTrue(fs.exists("/tmp/labm8.png"))
     fs.rm("/tmp/labm8.png")
示例#55
0
def test_finalise_figsize():
  _MakeTestPlot()
  viz.finalise("/tmp/labm8.png", figsize=(10, 5))
  assert fs.exists("/tmp/labm8.png")
  fs.rm("/tmp/labm8.png")
示例#56
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier",
        (job,base_err_fn)
    )
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute("SELECT Count(*) * 1.0 / 3 FROM classification_results "
                         "WHERE job=? AND actual=?", (job,baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in
        db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?",
            (job, baseline)
        )
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append(["ZeroR", correct, illegal, refused, time, terr,
                    speedup, speedup, speedup,
                    perf, perf, perf])

    # Get results
    for classifier,count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)
        ).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(*[
        (text.truncate(result[0], 40), result[1], result[2],
         result[3], result[4], result[5])
        for result in results
    ])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, time,
                           fmt="none", yerr=terr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, illegal, width=width,
           color=sns.color_palette("Reds", 1), label="Illegal")
    ax.bar(X + .1 + width, refused, width=width,
           color=sns.color_palette("Oranges", 1), label="Refused")
    ax.bar(X + .1 + 2 * width, correct, width=width,
           color=sns.color_palette("Blues", 1), label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors=sns.color_palette("Greens", len(err_fns))
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), speedups, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, speedups,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors=sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), perfs, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, perfs,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)


    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)