def runtimes_range(db, output=None, where=None, nbins=25, iqr=(0.25, 0.75), **kwargs): # data = [t[2:] for t in db.min_max_runtimes(where=where)] # min_t, max_t = zip(*data) # lower = labmath.filter_iqr(min_t, *iqr) # upper = labmath.filter_iqr(max_t, *iqr) # min_data = np.r_[lower, upper].min() # max_data = np.r_[lower, upper].max() # bins = np.linspace(min_data, max_data, nbins) # Plt.hist(lower, bins, label="Min") # plt.hist(upper, bins, label="Max"); title = kwargs.pop("title", "Normalised distribution of min and max runtimes") plt.title(title) plt.ylabel("Frequency") plt.xlabel("Runtime (normalised to mean)") plt.legend(frameon=True) viz.finalise(output, **kwargs)
def err_fn_speedups(db, err_fn, output=None, sort=False, job="xval", **kwargs): """ Plot speedup over the baseline of all classifiers for an err_fn. """ fig = plt.figure() ax = fig.add_subplot(111) for classifier in db.classification_classifiers: basename = ml.classifier_basename(classifier) performances = [row for row in db.execute("SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn))] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=basename) plt.plot([1 for _ in performances], "-", label="ZeroR") title = kwargs.pop("title", err_fn) ax.set_yscale("log") plt.title(title) plt.ylabel("Speedup (log)") plt.xlabel("Test instances") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def err_fn_speedups(db, err_fn, output=None, sort=False, job="xval", **kwargs): """ Plot speedup over the baseline of all classifiers for an err_fn. """ fig = plt.figure() ax = fig.add_subplot(111) for classifier in db.classification_classifiers: basename = ml.classifier_basename(classifier) performances = [ row for row in db.execute( "SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)) ] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=basename) plt.plot([1 for _ in performances], "-", label="ZeroR") title = kwargs.pop("title", err_fn) ax.set_yscale("log") plt.title(title) plt.ylabel("Speedup (log)") plt.xlabel("Test instances") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def classifier_speedups(db, classifier, output=None, sort=False, job="xval_classifiers", **kwargs): """ Plot speedup over the baseline of a classifier for each err_fn. """ for err_fn in db.err_fns: performances = [ row for row in db.execute( "SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)) ] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=err_fn) basename = ml.classifier_basename(classifier) plt.title(basename) plt.ylabel("Speedup") plt.xlabel("Test instances") plt.axhline(y=1, color="k") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def pie(data, output=None, **kwargs): labels, values = zip(*data) plt.pie(values, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90) viz.finalise(output, **kwargs)
def err_fn_performance(db, output=None, job="xval", **kwargs): err_fns = db.err_fns results = [ db.execute( "SELECT\n" " GEOMEAN(performance) * 100,\n" " CONFERROR(performance, .95) * 100,\n" " GEOMEAN(speedup) * 100,\n" " CONFERROR(speedup, .95) * 100\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND (illegal=1 or refused=1)", (job, err_fn) ).fetchone() for err_fn in err_fns ] perfs, perfErrors, speedups, speedupErrors = zip(*results) X = np.arange(len(err_fns)) # Bar width. width = (.8 / (len(results[0]) - 1)) plt.bar(X, perfs, width=width, color=sns.color_palette("Reds", 1), label="Performance") # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = plt.errorbar(X + .5 * width, perfs, fmt="none", yerr=perfErrors, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.bar(X + width, speedups, width=width, color=sns.color_palette("Greens", 1), label="Speedup") # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = plt.errorbar(X + 1.5 * width, speedups, fmt="none", yerr=speedupErrors, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.xlim(xmin=-.2) plt.xticks(X + .4, err_fns) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) title = kwargs.pop("title", "Error handler performance for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def bar3d(self, output=None, title=None, figsize=(5, 4), zlabel=None, zticklabels=None, rotation=None, **kwargs): import matplotlib.pyplot as plt X, Y, dZ = [], [], [] # Iterate over every point in space. for j, i in product(range(self.matrix.shape[0]), range(self.matrix.shape[1])): if self.matrix[j][i] > 0: X.append(i) Y.append(j) dZ.append(self.matrix[j][i]) num_vals = len(X) Z = np.zeros((num_vals, )) dX = np.ones((num_vals, )) dY = np.ones((num_vals, )) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.bar3d(X, Y, Z, dX, dY, dZ, **kwargs) # Set X axis labels ax.set_xticks(np.arange(len(self.c))) ax.set_xticklabels(self.c) ax.set_xlabel("Columns") # Set Y axis labels ax.set_yticks(np.arange(len(self.r))) ax.set_yticklabels(self.r) ax.set_ylabel("Rows") # Set Z axis labels if zlabel is not None: ax.set_zlabel(zlabel) if zticklabels is not None: ax.set_zticks(np.arange(len(zticklabels))) ax.set_zticklabels(zticklabels) # Set plot rotation. if rotation is not None: ax.view_init(azim=rotation) # Set plot title. if title: plt.title(title) plt.tight_layout() plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output)
def _performance_plot(output, labels, values, title, color=None, **kwargs): fig = plt.figure() ax = fig.add_subplot(111) sns.boxplot(data=values, linewidth=1, fliersize=1) # sns.violinplot(data=values, inner="quartile", linewidth=.5) ax.set_xticklabels(labels, rotation=90) plt.ylim(ymin=0, ymax=1) plt.ylabel("Performance") plt.title(title) viz.finalise(output, **kwargs)
def runtimes_histogram(runtimes, output=None, color=None, **kwargs): mean = np.mean(runtimes) fig = plt.figure() ax = fig.add_subplot(111) sns.distplot(runtimes, bins=40, kde_kws={"bw": .3}, color=color) ax.axvline(mean, color='0.25', linestyle='--') plt.xlim(min(runtimes), max(runtimes)) plt.gca().axes.get_yaxis().set_ticks([]) plt.xlabel("Runtime (ms)") plt.locator_params(axis="x", nbins=6) viz.finalise(output, **kwargs)
def confinterval_trend(sample_counts, confintervals, output=None, vlines=[], **kwargs): fig = plt.figure() ax = fig.add_subplot(111) plt.plot(sample_counts, [y * 100 for y in confintervals]) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) for vline in vlines: ax.axvline(vline, color='k', linestyle='--') plt.ylabel("95\\% CI / mean") plt.xlabel("Number of samples") plt.xlim(min(sample_counts), max(sample_counts)) viz.finalise(output, **kwargs)
def oracle_speedups(db, output=None, **kwargs): data = db.oracle_speedups().values() #Speedups = sorted(data, reverse=True) Speedups = data X = np.arange(len(Speedups)) plt.plot(X, Speedups) plt.xlim(0, len(X) - 1) title = kwargs.pop("title", "Attainable performance over baseline") plt.title(title) plt.xlabel("Scenarios") plt.ylabel("Speedup") viz.finalise(output, **kwargs)
def runtimes_variance(db, output=None, min_samples=1, where=None, **kwargs): # Create temporary table of scenarios and params to use, ignoring # those with less than "min_samples" samples. if "_temp" in db.tables: db.drop_table("_temp") db.execute("CREATE TABLE _temp (\n" " scenario TEXT,\n" " params TEXT,\n" " PRIMARY KEY (scenario,params)\n" ")") query = ( "INSERT INTO _temp\n" "SELECT\n" " scenario,\n" " params\n" "FROM runtime_stats\n" "WHERE num_samples >= ?" ) if where is not None: query += " AND " + where db.execute(query, (min_samples,)) X,Y = zip(*sorted([ row for row in db.execute( "SELECT\n" " AVG(runtime),\n" " CONFERROR(runtime, .95) / AVG(runtime)\n" "FROM _temp\n" "LEFT JOIN runtimes\n" " ON _temp.scenario=runtimes.scenario\n" " AND _temp.params=runtimes.params\n" "GROUP BY _temp.scenario,_temp.params" ) ], key=lambda x: x[0])) db.execute("DROP TABLE _temp") fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(X, Y) ax.set_xscale("log") title = kwargs.pop("title", "Runtime variance as a function of mean runtime") plt.title(title) plt.ylabel("Normalised confidence interval") plt.xlabel("Runtime (ms)") plt.xlim(0, X[-1]) plt.ylim(ymin=0) viz.finalise(output, **kwargs)
def performance_vs_coverage(db, output=None, max_values=250, **kwargs): data = [ row for row in db.execute("SELECT " " performance AS performance, " " coverage " "FROM param_stats") ] frame = pandas.DataFrame(data, columns=("Performance", "Legality")) sns.jointplot("Legality", "Performance", data=frame, xlim=(0, 1), ylim=(0, 1)) viz.finalise(output, **kwargs)
def performance_vs_coverage(db, output=None, max_values=250, **kwargs): data = [ row for row in db.execute( "SELECT " " performance AS performance, " " coverage " "FROM param_stats" ) ] frame = pandas.DataFrame(data, columns=("Performance", "Legality")) sns.jointplot("Legality", "Performance", data=frame, xlim=(0, 1), ylim=(0, 1)) viz.finalise(output, **kwargs)
def bar3d(self, output=None, title=None, figsize=(5,4), zlabel=None, zticklabels=None, rotation=None, **kwargs): import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.mplot3d import Axes3D X, Y, dZ = [], [], [] # Iterate over every point in space. for j,i in product(range(self.matrix.shape[0]), range(self.matrix.shape[1])): if self.matrix[j][i] > 0: X.append(i) Y.append(j) dZ.append(self.matrix[j][i]) num_vals = len(X) Z = np.zeros((num_vals,)) dX = np.ones((num_vals,)) dY = np.ones((num_vals,)) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.bar3d(X, Y, Z, dX, dY, dZ, **kwargs) # Set X axis labels ax.set_xticks(np.arange(len(self.c))) ax.set_xticklabels(self.c) ax.set_xlabel("Columns") # Set Y axis labels ax.set_yticks(np.arange(len(self.r))) ax.set_yticklabels(self.r) ax.set_ylabel("Rows") # Set Z axis labels if zlabel is not None: ax.set_zlabel(zlabel) if zticklabels is not None: ax.set_zticks(np.arange(len(zticklabels))) ax.set_zticklabels(zticklabels) # Set plot rotation. if rotation is not None: ax.view_init(azim=rotation) # Set plot title. if title: plt.title(title) plt.tight_layout() plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output)
def refused_params_by_vendor(db, output=None, **kwargs): data = [ row for row in db.execute( "SELECT devices.vendor," " ratio_refused " "FROM devices LEFT JOIN (" "SELECT " " devices.vendor AS opencl, " " (Count(*) * 1.0 / ( " " SELECT Count(*) " " FROM runtime_stats " " LEFT JOIN scenarios " " ON runtime_stats.scenario=scenarios.id " " LEFT JOIN devices AS dev " " ON scenarios.device=dev.id " " WHERE dev.vendor=devices.vendor " " )) * 100 AS ratio_refused " "FROM refused_params " "LEFT JOIN scenarios " " ON refused_params.scenario=scenarios.id " "LEFT JOIN devices " " ON scenarios.device=devices.id " "GROUP BY devices.vendor COLLATE NOCASE )" "ON devices.vendor like opencl " "GROUP BY devices.vendor COLLATE NOCASE " "ORDER BY ratio_refused DESC" ) ] labels, Y = zip(*data) Y = [0 if not y else y for y in Y] X = np.arange(len(Y)) fig, ax = plt.subplots() ax.bar(X + .1, Y, width = .8) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation=90) ax.set_ylabel("Ratio refused (\\%)") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) for tick in ax.xaxis.get_minor_ticks(): tick.tick1line.set_markersize(0) tick.tick2line.set_markersize(0) tick.label1.set_horizontalalignment('center') viz.finalise(output, **kwargs) return data
def runtimes_variance(db, output=None, min_samples=1, where=None, **kwargs): # Create temporary table of scenarios and params to use, ignoring # those with less than "min_samples" samples. if "_temp" in db.tables: db.drop_table("_temp") db.execute("CREATE TABLE _temp (\n" " scenario TEXT,\n" " params TEXT,\n" " PRIMARY KEY (scenario,params)\n" ")") query = ("INSERT INTO _temp\n" "SELECT\n" " scenario,\n" " params\n" "FROM runtime_stats\n" "WHERE num_samples >= ?") if where is not None: query += " AND " + where db.execute(query, (min_samples, )) X, Y = zip(*sorted([ row for row in db.execute("SELECT\n" " AVG(runtime),\n" " CONFERROR(runtime, .95) / AVG(runtime)\n" "FROM _temp\n" "LEFT JOIN runtimes\n" " ON _temp.scenario=runtimes.scenario\n" " AND _temp.params=runtimes.params\n" "GROUP BY _temp.scenario,_temp.params") ], key=lambda x: x[0])) db.execute("DROP TABLE _temp") fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(X, Y) ax.set_xscale("log") title = kwargs.pop("title", "Runtime variance as a function of mean runtime") plt.title(title) plt.ylabel("Normalised confidence interval") plt.xlabel("Runtime (ms)") plt.xlim(0, X[-1]) plt.ylim(ymin=0) viz.finalise(output, **kwargs)
def refused_params_by_vendor(db, output=None, **kwargs): data = [ row for row in db.execute( "SELECT devices.vendor," " ratio_refused " "FROM devices LEFT JOIN (" "SELECT " " devices.vendor AS opencl, " " (Count(*) * 1.0 / ( " " SELECT Count(*) " " FROM runtime_stats " " LEFT JOIN scenarios " " ON runtime_stats.scenario=scenarios.id " " LEFT JOIN devices AS dev " " ON scenarios.device=dev.id " " WHERE dev.vendor=devices.vendor " " )) * 100 AS ratio_refused " "FROM refused_params " "LEFT JOIN scenarios " " ON refused_params.scenario=scenarios.id " "LEFT JOIN devices " " ON scenarios.device=devices.id " "GROUP BY devices.vendor COLLATE NOCASE )" "ON devices.vendor like opencl " "GROUP BY devices.vendor COLLATE NOCASE " "ORDER BY ratio_refused DESC") ] labels, Y = zip(*data) Y = [0 if not y else y for y in Y] X = np.arange(len(Y)) fig, ax = plt.subplots() ax.bar(X + .1, Y, width=.8) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation=90) ax.set_ylabel("Ratio refused (\\%)") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) for tick in ax.xaxis.get_minor_ticks(): tick.tick1line.set_markersize(0) tick.tick2line.set_markersize(0) tick.label1.set_horizontalalignment('center') viz.finalise(output, **kwargs) return data
def heatmap(self, output=None, title=None, figsize=(5, 4), xlabels=True, ylabels=True, cbar=True, **kwargs): import matplotlib.pyplot as plt import seaborn as sns new_order = list(reversed(range(self.matrix.shape[0]))) data = self.matrix[:][new_order] if "square" not in kwargs: kwargs["square"] = True if xlabels == True: xticklabels = ["" if x % 20 else str(x) for x in self.c] else: xticklabels = xlabels if ylabels == True: yticklabels = [ "" if x % 20 else str(x) for x in list(reversed(self.r)) ] else: yticklabels = ylabels _, ax = plt.subplots(figsize=figsize) sns.heatmap(data, xticklabels=xticklabels, yticklabels=yticklabels, cbar=cbar, **kwargs) # Set labels. ax.set_ylabel("Rows") ax.set_xlabel("Columns") if title: plt.title(title) plt.tight_layout() plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output)
def max_speedups(db, output=None, **kwargs): max_speedups,min_static,he = zip(*db.max_and_static_speedups) X = np.arange(len(max_speedups)) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(X, max_speedups, "r", linestyle="--", label="Max") ax.plot(X, min_static, label="$w_{(4 \\times 4)}$") ax.plot(X, he, linestyle="-", label="$w_{(32 \\times 4)}$") # plt.ylim(ymin=0, ymax=100) plt.xlim(xmin=0, xmax=len(X) - 1) title = kwargs.pop("title", "Max attainable speedups") plt.title(title) ax.set_yscale("log") plt.legend(frameon=True) plt.ylabel("Speedup (log)") plt.xlabel("Scenarios (sorted by descending max speedup)") viz.finalise(output, **kwargs)
def max_speedups(db, output=None, **kwargs): max_speedups, min_static, he = zip(*db.max_and_static_speedups) X = np.arange(len(max_speedups)) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(X, max_speedups, "r", linestyle="--", label="Max") ax.plot(X, min_static, label="$w_{(4 \\times 4)}$") ax.plot(X, he, linestyle="-", label="$w_{(32 \\times 4)}$") # plt.ylim(ymin=0, ymax=100) plt.xlim(xmin=0, xmax=len(X) - 1) title = kwargs.pop("title", "Max attainable speedups") plt.title(title) ax.set_yscale("log") plt.legend(frameon=True) plt.ylabel("Speedup (log)") plt.xlabel("Scenarios (sorted by descending max speedup)") viz.finalise(output, **kwargs)
def performance_vs_max_wgsize(ratios, output=None, color=None, **kwargs): title = kwargs.pop("title", "Workgroup size performance vs. maximum workgroup size") fig = plt.figure() ax = fig.add_subplot(111) sns.boxplot(data=ratios, linewidth=1, fliersize=1) # sns.violinplot(data=ratios, inner="quartile", linewidth=.5) multiplier = kwargs.pop("multiplier", 10) ax.set_xticklabels([str((x+1) * multiplier) + r'\%' for x in np.arange(len(ratios))]) title = kwargs.pop("title", "") plt.title(title) plt.ylim(ymin=0, ymax=1) plt.ylabel("Performance") xlabel = kwargs.pop("xlabel", "") plt.xlabel(xlabel) viz.finalise(output, **kwargs)
def runtimes_range(db, output=None, where=None, nbins=25, iqr=(0.25,0.75), **kwargs): # data = [t[2:] for t in db.min_max_runtimes(where=where)] # min_t, max_t = zip(*data) # lower = labmath.filter_iqr(min_t, *iqr) # upper = labmath.filter_iqr(max_t, *iqr) # min_data = np.r_[lower, upper].min() # max_data = np.r_[lower, upper].max() # bins = np.linspace(min_data, max_data, nbins) # Plt.hist(lower, bins, label="Min") # plt.hist(upper, bins, label="Max"); title = kwargs.pop("title", "Normalised distribution of min and max runtimes") plt.title(title) plt.ylabel("Frequency") plt.xlabel("Runtime (normalised to mean)") plt.legend(frameon=True) viz.finalise(output, **kwargs)
def performance_vs_max_wgsize(ratios, output=None, color=None, **kwargs): title = kwargs.pop( "title", "Workgroup size performance vs. maximum workgroup size") fig = plt.figure() ax = fig.add_subplot(111) sns.boxplot(data=ratios, linewidth=1, fliersize=1) # sns.violinplot(data=ratios, inner="quartile", linewidth=.5) multiplier = kwargs.pop("multiplier", 10) ax.set_xticklabels( [str((x + 1) * multiplier) + r'\%' for x in np.arange(len(ratios))]) title = kwargs.pop("title", "") plt.title(title) plt.ylim(ymin=0, ymax=1) plt.ylabel("Performance") xlabel = kwargs.pop("xlabel", "") plt.xlabel(xlabel) viz.finalise(output, **kwargs)
def num_params_vs_accuracy(db, output=None, where=None, **kwargs): freqs = sorted(db.oracle_param_frequencies(normalise=True).values(), reverse=True) acc = 0 Data = [0] * len(freqs) for i,freq in enumerate(freqs): acc += freq * 100 Data[i] = acc X = np.arange(len(Data)) ax = plt.subplot(111) ax.plot(X, Data) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) plt.xlim(xmin=0, xmax=len(X) - 1) plt.ylim(ymin=0, ymax=100) title = kwargs.pop("title", "Number of workgroup sizes vs. oracle accuracy") plt.title(title) plt.ylabel("Accuracy") plt.xlabel("Number of distinct workgroup sizes") plt.legend(frameon=True) viz.finalise(output, **kwargs)
def refused_params_by_device(db, output=None, **kwargs): data = [ (fmtdevid(row[0]), round(row[1], 2)) for row in db.execute( "SELECT " " devices.id AS device, " " (Count(*) * 1.0 / ( " " SELECT Count(*) " " FROM runtime_stats " " LEFT JOIN scenarios " " ON runtime_stats.scenario=scenarios.id " " WHERE scenarios.device=devices.id " " )) * 100 AS ratio_refused " "FROM refused_params " "LEFT JOIN scenarios " " ON refused_params.scenario=scenarios.id " "LEFT JOIN devices " " ON scenarios.device=devices.id " "GROUP BY devices.id " "ORDER BY ratio_refused DESC" ) ] labels, Y = zip(*data) X = np.arange(len(Y)) fig, ax = plt.subplots() ax.bar(X + .1, Y, width = .8) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation=90) ax.set_ylabel("Ratio refused (\\%)") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) for tick in ax.xaxis.get_minor_ticks(): tick.tick1line.set_markersize(0) tick.tick2line.set_markersize(0) tick.label1.set_horizontalalignment('center') viz.finalise(output, **kwargs)
def runtime_regression(db, output=None, job="xval", **kwargs): """ Plot accuracy of a classifier at predicted runtime. """ fig = plt.figure() ax = fig.add_subplot(111) colors = sns.color_palette() i, actual = 0, [] for i, classifier in enumerate(db.regression_classifiers): basename = ml.classifier_basename(classifier) actual, predicted = zip(*sorted([ row for row in db.execute( "SELECT\n" " actual,\n" " predicted\n" "FROM runtime_regression_results\n" "WHERE job=? AND classifier=?", (job, classifier)) ], key=lambda x: x[0], reverse=True)) if basename == "ZeroR": ax.plot(predicted, label=basename, color=colors[i - 1]) else: ax.scatter(np.arange(len(predicted)), predicted, label=basename, color=colors[i - 1]) ax.plot(actual, label="Actual", color=colors[i]) ax.set_yscale("log") plt.xlim(0, len(actual)) plt.legend() title = kwargs.pop("title", "Runtime regression for " + job) plt.title(title) plt.xlabel("Test instances (sorted by descending runtime)") plt.ylabel("Runtime (ms, log)") viz.finalise(output, **kwargs)
def classifier_speedups(db, classifier, output=None, sort=False, job="xval_classifiers", **kwargs): """ Plot speedup over the baseline of a classifier for each err_fn. """ for err_fn in db.err_fns: performances = [row for row in db.execute("SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn))] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=err_fn) basename = ml.classifier_basename(classifier) plt.title(basename) plt.ylabel("Speedup") plt.xlabel("Test instances") plt.axhline(y=1, color="k") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def num_params_vs_accuracy(db, output=None, where=None, **kwargs): freqs = sorted(db.oracle_param_frequencies(normalise=True).values(), reverse=True) acc = 0 Data = [0] * len(freqs) for i, freq in enumerate(freqs): acc += freq * 100 Data[i] = acc X = np.arange(len(Data)) ax = plt.subplot(111) ax.plot(X, Data) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) plt.xlim(xmin=0, xmax=len(X) - 1) plt.ylim(ymin=0, ymax=100) title = kwargs.pop("title", "Number of workgroup sizes vs. oracle accuracy") plt.title(title) plt.ylabel("Accuracy") plt.xlabel("Number of distinct workgroup sizes") plt.legend(frameon=True) viz.finalise(output, **kwargs)
def runtime_regression(db, output=None, job="xval", **kwargs): """ Plot accuracy of a classifier at predicted runtime. """ fig = plt.figure() ax = fig.add_subplot(111) colors = sns.color_palette() i, actual = 0, [] for i,classifier in enumerate(db.regression_classifiers): basename = ml.classifier_basename(classifier) actual, predicted = zip(*sorted([ row for row in db.execute( "SELECT\n" " actual,\n" " predicted\n" "FROM runtime_regression_results\n" "WHERE job=? AND classifier=?", (job, classifier) ) ], key=lambda x: x[0], reverse=True)) if basename == "ZeroR": ax.plot(predicted, label=basename, color=colors[i - 1]) else: ax.scatter(np.arange(len(predicted)), predicted, label=basename, color=colors[i - 1]) ax.plot(actual, label="Actual", color=colors[i]) ax.set_yscale("log") plt.xlim(0, len(actual)) plt.legend() title = kwargs.pop("title", "Runtime regression for " + job) plt.title(title) plt.xlabel("Test instances (sorted by descending runtime)") plt.ylabel("Runtime (ms, log)") viz.finalise(output, **kwargs)
def num_params(db, output=None, sample_range=None, **kwargs): # Range of param counts. sample_range = sample_range or (1, 100) num_instances = db.num_rows("scenario_stats") X = np.arange(num_instances) Y = np.zeros(num_instances) for i in range(sample_range[0], sample_range[1] + 1): Y[i] = db.execute( "SELECT (Count(*) * 1.0 / ?) * 100 " "FROM scenario_stats WHERE num_params >= ?", (num_instances, i)).fetchone()[0] title = kwargs.pop("title", "Parameter values count") plt.title(title) plt.xlabel("Number of parameters") plt.ylabel("Ratio of scenarios") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) plt.plot(X, Y) plt.xlim(*sample_range) viz.finalise(output, **kwargs)
def num_params(db, output=None, sample_range=None, **kwargs): # Range of param counts. sample_range = sample_range or (1, 100) num_instances = db.num_rows("scenario_stats") X = np.arange(num_instances) Y = np.zeros(num_instances) for i in range(sample_range[0], sample_range[1] + 1): Y[i] = db.execute("SELECT (Count(*) * 1.0 / ?) * 100 " "FROM scenario_stats WHERE num_params >= ?", (num_instances, i)).fetchone()[0] title = kwargs.pop("title", "Parameter values count") plt.title(title) plt.xlabel("Number of parameters") plt.ylabel("Ratio of scenarios") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) plt.plot(X, Y) plt.xlim(*sample_range) viz.finalise(output, **kwargs)
def heatmap(self, output=None, title=None, figsize=(5,4), xlabels=True, ylabels=True, cbar=True, **kwargs): import matplotlib.pyplot as plt import seaborn as sns new_order = list(reversed(range(self.matrix.shape[0]))) data = self.matrix[:][new_order] if "square" not in kwargs: kwargs["square"] = True if xlabels == True: xticklabels = ["" if x % 20 else str(x) for x in self.c] else: xticklabels = xlabels if ylabels == True: yticklabels = ["" if x % 20 else str(x) for x in list(reversed(self.r))] else: yticklabels = ylabels _, ax = plt.subplots(figsize=figsize) sns.heatmap(data, xticklabels=xticklabels, yticklabels=yticklabels, cbar=cbar, **kwargs) # Set labels. ax.set_ylabel("Rows") ax.set_xlabel("Columns") if title: plt.title(title) plt.tight_layout() plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output)
def refused_params_by_device(db, output=None, **kwargs): data = [(fmtdevid(row[0]), round(row[1], 2)) for row in db.execute( "SELECT " " devices.id AS device, " " (Count(*) * 1.0 / ( " " SELECT Count(*) " " FROM runtime_stats " " LEFT JOIN scenarios " " ON runtime_stats.scenario=scenarios.id " " WHERE scenarios.device=devices.id " " )) * 100 AS ratio_refused " "FROM refused_params " "LEFT JOIN scenarios " " ON refused_params.scenario=scenarios.id " "LEFT JOIN devices " " ON scenarios.device=devices.id " "GROUP BY devices.id " "ORDER BY ratio_refused DESC")] labels, Y = zip(*data) X = np.arange(len(Y)) fig, ax = plt.subplots() ax.bar(X + .1, Y, width=.8) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation=90) ax.set_ylabel("Ratio refused (\\%)") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) for tick in ax.xaxis.get_minor_ticks(): tick.tick1line.set_markersize(0) tick.tick2line.set_markersize(0) tick.label1.set_horizontalalignment('center') viz.finalise(output, **kwargs)
def plot_speedups_with_clgen(benchmarks_data, clgen_data, suite="npb"): """ Plot speedups of predictive models trained with and without clgen. Returns speedups (without and with). """ # datasets: B - benchmarks, S - synthetics, BS - benchmarks + synthetics: B = pd.read_csv(benchmarks_data) B["group"] = ["B"] * len(B) S = pd.read_csv(clgen_data) S["group"] = ["S"] * len(S) BS = pd.concat((B, S)) # find the ZeroR. This is the device which is most frequently optimal Bmask = B[B["benchmark"].str.contains(suite)] zeror = Counter(Bmask["oracle"]).most_common(1)[0][0] zeror_runtime = "runtime_" + zeror.lower() # get the names of the benchmarks, in the form: $suite-$version-$benchmark benchmark_names = sorted(set([ re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)-", b).group(1) for b in B["benchmark"] if b.startswith(suite) ])) B_out, BS_out = [], [] for benchmark in benchmark_names: clf = cgo13.model() features = get_cgo13_features # cross validate on baseline B_out += cgo13.leave_one_benchmark_out(clf, features, B, benchmark) # reset model clf = cgo13.model() # repeate cross-validation with synthetic kernels BS_out += cgo13.leave_one_benchmark_out(clf, features, BS, benchmark) # create results frame R_out = [] for b, bs in zip(B_out, BS_out): # get runtimes of device using predicted device b_p_runtime = b["runtime_" + b["p"].lower()] bs_p_runtime = bs["runtime_" + bs["p"].lower()] # speedup is the ratio of runtime using the predicted device # over runtime using ZeroR device b["p_speedup"] = b_p_runtime / b[zeror_runtime] bs["p_speedup"] = bs_p_runtime / bs[zeror_runtime] if "training" in benchmarks_data: # $benchmark group = escape_benchmark_name(b["benchmark"]) else: # $benchmark.$dataset group = re.sub(r"[^-]+-[0-9\.]+-([^-]+)-.+", r"\1", b["benchmark"]) + "." + b["dataset"] b["group"] = group bs["group"] = group # set the training data type b["training"] = "Grewe et al." bs["training"] = "w. CLgen" R_out.append(b) R_out.append(bs) R = pd.DataFrame(R_out) b_mask = R["training"] == "Grewe et al." bs_mask = R["training"] == "w. CLgen" B_speedup = labmath.mean(R[b_mask].groupby(["group"])["p_speedup"].mean()) BS_speedup = labmath.mean(R[bs_mask].groupby(["group"])["p_speedup"].mean()) print(" #. benchmarks: ", len(set(B["benchmark"])), "kernels,", len(B), "observations") print(" #. synthetic: ", len(set(S["benchmark"])), "kernels,", len(S), "observations") print() print(" ZeroR device: {}".format(zeror)) print() print(" Speedup of Grewe et al.: {:.2f} x".format(B_speedup)) print(" Speedup w. CLgen: {:.2f} x".format(BS_speedup)) R = R.append({ # average bars "group": "Average", "p_speedup": B_speedup, "training": "Grewe et al." }, ignore_index=True) R = R.append({ "group": "Average", "p_speedup": BS_speedup, "training": "w. CLgen" }, ignore_index=True) R["p_speedup"] -= 1 # negative offset so that bars start at 1 # colors palette = sns.cubehelix_palette(len(set(R["training"])), rot=-.4, light=.85, dark=.35) ax = sns.barplot( x="group", y="p_speedup", data=R, ci=None, hue="training", palette=palette) plt.ylabel("Speedup") plt.xlabel("") plt.axhline(y=0, color="k", lw=1) # speedup line plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--") # average line ax.get_legend().set_title("") # no legend title plt.legend(loc='upper right') ax.get_legend().draw_frame(True) # plot shape and size figsize = (9, 2.2) if "nvidia" in benchmarks_data: typecast = int; plt.ylim(-1, 16) elif "training" in benchmarks_data: typecast = float; figsize = (7, 3.2) else: typecast = float # counter negative offset: ax.set_yticklabels([typecast(i) + 1 for i in ax.get_yticks()]) plt.setp(ax.get_xticklabels(), rotation=90) viz.finalise(figsize=figsize, tight=True) return B_speedup, BS_speedup
def trisurf(self, output=None, title=None, figsize=(5,4), zlabel=None, zticklabels=None, rotation=None, **kwargs): import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.mplot3d import Axes3D num_vals = self.matrix.shape[0] * self.matrix.shape[1] if num_vals < 3: io.error("Cannot create trisurf of", num_vals, "values") return X = np.zeros((num_vals,)) Y = np.zeros((num_vals,)) Z = np.zeros((num_vals,)) # Iterate over every point in space. for j,i in product(range(self.matrix.shape[0]), range(self.matrix.shape[1])): # Convert point to list index. index = j * self.matrix.shape[1] + i X[index] = i Y[index] = j Z[index] = self.matrix[j][i] fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.plot_trisurf(X, Y, Z, cmap=cm.jet, **kwargs) # Set X axis labels xticks = [] xticklabels = [] for i,c in enumerate(self.c): if not len(xticks) or c % 20 == 0: xticks.append(i) xticklabels.append(c) ax.set_xticks(xticks) ax.set_xticklabels(xticklabels) ax.set_xlabel("$w_c$") # Set Y axis labels yticks = [] yticklabels = [] for i,c in enumerate(self.c): if not len(yticks) or c % 20 == 0: yticks.append(i) yticklabels.append(c) ax.set_yticks(yticks) ax.set_yticklabels(yticklabels) ax.set_ylabel("$w_r$") # Set Z axis labels if zlabel is not None: ax.set_zlabel(zlabel) if zticklabels is not None: ax.set_zticks(np.arange(len(zticklabels))) ax.set_zticklabels(zticklabels) # Set plot rotation. if rotation is not None: ax.view_init(azim=rotation) # Set plot title. if title: plt.title(title) plt.tight_layout() plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output)
def speedup_classification(db, output=None, job="xval", **kwargs): """ Plot performance of classification using speedup regression. """ # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM speedup_classification_results\n" "WHERE job=? GROUP BY classifier", (job, )) results = [] for classifier, count in query: basename = ml.classifier_basename(classifier) correct = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (count, job, classifier)).fetchone()[0] # Get a list of mean speedups for each err_fn. speedups = [ row for row in db.execute( "SELECT\n" " AVG(speedup) * 100,\n" " CONFERROR(speedup, .95) * 100,\n" " AVG(performance) * 100,\n" " CONFERROR(performance, .95) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (job, classifier)).fetchone() ] results.append([basename, correct] + speedups) # Zip into lists. labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results) X = np.arange(len(labels)) # Bar width. width = (.8 / (len(results[0]) - 1)) plt.bar(X + width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accuracy") plt.bar(X + 2 * width, speedups, width=width, color=sns.color_palette("Greens", 1), label="Speedup") plt.bar(X + 3 * width, perfs, width=width, color=sns.color_palette("Oranges", 1), label="Performance") # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = plt.errorbar(X + 2.5 * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) _, caps, _ = plt.errorbar(X + 3.5 * width, perfs, fmt="none", yerr=perf_yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.xlim(xmin=-.2) plt.xticks(X + .4, labels) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) title = kwargs.pop( "title", "Classification results for " + job + " using speedup regression") plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def test_finalise_tight(): _MakeTestPlot() viz.finalise("/tmp/labm8.png", tight=True) assert fs.exists("/tmp/labm8.png") fs.rm("/tmp/labm8.png")
def main(): db = _db.Database(experiment.ORACLE_PATH) ml.start() # Delete any old stuff. fs.rm(experiment.IMG_ROOT + "/*") fs.rm(experiment.TAB_ROOT + "/*") # Make directories fs.mkdir(experiment.TAB_ROOT) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/bars")) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/heatmap")) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/trisurf")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/datasets")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/datasets")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/datasets")) visualise.pie(db.num_scenarios_by_device, fs.path(experiment.IMG_ROOT, "num_sceanrios_by_device")) visualise.pie(db.num_runtime_stats_by_device, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_device")) visualise.pie(db.num_scenarios_by_dataset, fs.path(experiment.IMG_ROOT, "num_sceanrios_by_dataset")) visualise.pie(db.num_runtime_stats_by_dataset, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_dataset")) visualise.pie(db.num_runtime_stats_by_kernel, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel")) visualise.pie(db.num_runtime_stats_by_kernel, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel")) # Per-scenario plots for row in db.scenario_properties: scenario,device,kernel,north,south,east,west,max_wgsize,width,height,tout = row title = ("{device}: {kernel}[{n},{s},{e},{w}]\n" "{width} x {height} {type}s" .format(device=text.truncate(device, 18), kernel=kernel, n=north, s=south, e=east, w=west, width=width, height=height, type=tout)) output = fs.path(experiment.IMG_ROOT, "scenarios/heatmap/{id}.png".format(id=scenario)) space = _space.ParamSpace.from_dict(db.perf_scenario(scenario)) max_c = min(25, len(space.c)) max_r = min(25, len(space.r)) space.reshape(max_c=max_c, max_r=max_r) # Heatmaps. mask = _space.ParamSpace(space.c, space.r) for j in range(len(mask.r)): for i in range(len(mask.c)): if space.matrix[j][i] == 0: r, c = space.r[j], space.c[i] # TODO: Get values from refused_params table. if r * c >= max_wgsize: # Illegal mask.matrix[j][i] = -1 else: # Refused db.execute("INSERT OR IGNORE INTO refused_params VALUES(?,?)", (scenario, hash_params(c, r))) space.matrix[j][i] = -1 mask.matrix[j][i] = 1 db.commit() new_order = list(reversed(range(space.matrix.shape[0]))) data = space.matrix[:][new_order] figsize=(12,6) _, ax = plt.subplots(1, 2, figsize=figsize, sharey=True) sns.heatmap(data, ax=ax[0], vmin=-1, vmax=1, xticklabels=space.c, yticklabels=list(reversed(space.r)), square=True) ax[0].set_title(title) new_order = list(reversed(range(mask.matrix.shape[0]))) data = mask.matrix[:][new_order] sns.heatmap(data, ax=ax[1], vmin=-1, vmax=1, xticklabels=space.c, yticklabels=list(reversed(space.r)), square=True) # Set labels. ax[0].set_ylabel("Rows") ax[0].set_xlabel("Columns") ax[1].set_ylabel("Rows") ax[1].set_xlabel("Columns") # plt.tight_layout() # plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output) # 3D bars. output = fs.path(experiment.IMG_ROOT, "scenarios/bars/{id}.png".format(id=scenario)) space.bar3d(output=output, title=title, zlabel="Performance", rotation=45) # Trisurfs. output = fs.path(experiment.IMG_ROOT, "scenarios/trisurf/{id}.png".format(id=scenario)) space.trisurf(output=output, title=title, zlabel="Performance", rotation=45) ##################### # ML Visualisations # ##################### #features_tab(db, experiment.TAB_ROOT) visualise_classification_job(db, "xval") visualise_classification_job(db, "arch") visualise_classification_job(db, "xval_real") visualise_classification_job(db, "synthetic_real") # Runtime regression accuracy. visualise_regression_job(db, "xval") visualise_regression_job(db, "arch") visualise_regression_job(db, "xval_real") visualise_regression_job(db, "synthetic_real") # Whole-dataset plots visualise.runtimes_variance(db, fs.path(experiment.IMG_ROOT, "runtime_variance.png"), min_samples=30) visualise.num_samples(db, fs.path(experiment.IMG_ROOT, "num_samples.png")) visualise.runtimes_range(db, fs.path(experiment.IMG_ROOT, "runtimes_range.png")) visualise.max_speedups(db, fs.path(experiment.IMG_ROOT, "max_speedups.png")) visualise.kernel_performance(db, fs.path(experiment.IMG_ROOT, "kernel_performance.png")) visualise.device_performance(db, fs.path(experiment.IMG_ROOT, "device_performance.png")) visualise.dataset_performance(db, fs.path(experiment.IMG_ROOT, "dataset_performance.png")) visualise.num_params_vs_accuracy(db, fs.path(experiment.IMG_ROOT, "num_params_vs_accuracy.png")) visualise.performance_vs_coverage(db, fs.path(experiment.IMG_ROOT, "performance_vs_coverage.png")) visualise.performance_vs_max_wgsize( db, fs.path(experiment.IMG_ROOT, "performance_vs_max_wgsize.png") ) visualise.performance_vs_wgsize(db, fs.path(experiment.IMG_ROOT, "performance_vs_wgsize.png")) visualise.performance_vs_wg_c(db, fs.path(experiment.IMG_ROOT, "performance_vs_wg_c.png")) visualise.performance_vs_wg_r(db, fs.path(experiment.IMG_ROOT, "performance_vs_wg_r.png")) visualise.max_wgsizes(db, fs.path(experiment.IMG_ROOT, "max_wgsizes.png")) visualise.oracle_speedups(db, fs.path(experiment.IMG_ROOT, "oracle_speedups.png")) visualise.coverage(db, fs.path(experiment.IMG_ROOT, "coverage/coverage.png")) visualise.safety(db, fs.path(experiment.IMG_ROOT, "safety/safety.png")) visualise.oracle_wgsizes(db, fs.path(experiment.IMG_ROOT, "oracle/all.png")) # Per-device plots for i,device in enumerate(db.devices): where = ("scenario IN " "(SELECT id from scenarios WHERE device='{0}')" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}.png".format(i)) visualise.coverage(db, output=output, where=where, title=device) output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}.png".format(i)) visualise.safety(db, output, where=where, title=device) output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device) where = ("scenario IN (\n" " SELECT id from scenarios WHERE device='{0}'\n" ") AND scenario IN (\n" " SELECT id FROM scenarios WHERE kernel IN (\n" " SELECT id FROM kernel_names WHERE synthetic=0\n" " )\n" ")" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}_real.png".format(i)) visualise.coverage(db, output=output, where=where, title=device + ", real") output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}_real.png".format(i)) visualise.safety(db, output, where=where, title=device + ", real") output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}_real.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device + ", real") where = ("scenario IN (\n" " SELECT id from scenarios WHERE device='{0}'\n" ") AND scenario IN (\n" " SELECT id FROM scenarios WHERE kernel IN (\n" " SELECT id FROM kernel_names WHERE synthetic=1\n" " )\n" ")" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}_synthetic.png".format(i)) visualise.coverage(db, output=output, where=where, title=device + ", synthetic") output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}_synthetic.png".format(i)) visualise.safety(db, output, where=where, title=device + ", synthetic") output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}_synthetic.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device + ", synthetic") # Per-kernel plots for kernel,ids in db.lookup_named_kernels().iteritems(): id_wrapped = ['"' + id + '"' for id in ids] where = ("scenario IN " "(SELECT id from scenarios WHERE kernel IN ({0}))" .format(",".join(id_wrapped))) output = fs.path(experiment.IMG_ROOT, "coverage/kernels/{0}.png".format(kernel)) visualise.coverage(db, output=output, where=where, title=kernel) output = fs.path(experiment.IMG_ROOT, "safety/kernels/{0}.png".format(kernel)) visualise.safety(db, output=output, where=where, title=kernel) output = fs.path(experiment.IMG_ROOT, "oracle/kernels/{0}.png".format(kernel)) visualise.safety(db, output=output, where=where, title=kernel) # Per-dataset plots for i,dataset in enumerate(db.datasets): where = ("scenario IN " "(SELECT id from scenarios WHERE dataset='{0}')" .format(dataset)) output = fs.path(experiment.IMG_ROOT, "coverage/datasets/{0}.png".format(i)) visualise.coverage(db, output, where=where, title=dataset) output = fs.path(experiment.IMG_ROOT, "safety/datasets/{0}.png".format(i)) visualise.safety(db, output, where=where, title=dataset) output = fs.path(experiment.IMG_ROOT, "oracle/datasets/{0}.png".format(i)) visualise.safety(db, output, where=where, title=dataset) ml.stop()
def classification(db, output=None, job="xval", **kwargs): err_fns = db.err_fns base_err_fn = err_fns[0] # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n" "GROUP BY classifier", (job, base_err_fn)) results = [] # Add baseline results. baseline = ("4x4") correct = db.execute( "SELECT Count(*) * 1.0 / 3 FROM classification_results " "WHERE job=? AND actual=?", (job, baseline)).fetchone()[0] illegal = 0 refused = 0 time = 0 terr = 0 speedup = (1, 0) perfs = [ row[1] for row in db.execute( "SELECT " " DISTINCT runtime_stats.scenario, " " (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 " "FROM classification_results " "LEFT JOIN runtime_stats " " ON classification_results.scenario=runtime_stats.scenario " "LEFT JOIN scenario_stats " " ON classification_results.scenario=scenario_stats.scenario " "WHERE job=? and runtime_stats.params=?", (job, baseline)) ] perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True)) results.append([ "ZeroR", correct, illegal, refused, time, terr, speedup, speedup, speedup, perf, perf, perf ]) # Get results for classifier, count in query: basename = ml.classifier_basename(classifier) correct, illegal, refused, time, terr = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100,\n" " (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n" " (SUM(refused) / CAST(? AS FLOAT)) * 100,\n" " AVG(time) + 2.5,\n" " CONFERROR(time, .95) * 1.5\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (count, count, count, job, classifier, base_err_fn)).fetchone() # Get a list of mean speedups for each err_fn. speedups = [ db.execute( "SELECT\n" " AVG(speedup),\n" " CONFERROR(speedup, .95)\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)).fetchone() for err_fn in err_fns ] # Get a list of mean perfs for each err_fn. perfs = [ db.execute( "SELECT\n" " AVG(performance) * 100.0,\n" " CONFERROR(performance, .95) * 100.0\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)).fetchone() for err_fn in err_fns ] results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs) # Zip into lists. labels, correct, illegal, refused, time, terr = zip( *[(text.truncate(result[0], 40), result[1], result[2], result[3], result[4], result[5]) for result in results]) X = np.arange(len(labels)) # PLOT TIMES width = .8 ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 10) ax.set_ylabel("Classification time (ms)") # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # RATIOS width = (.8 / 3) ax = plt.subplot(4, 1, 2) ax.bar(X + .1, illegal, width=width, color=sns.color_palette("Reds", 1), label="Illegal") ax.bar(X + .1 + width, refused, width=width, color=sns.color_palette("Oranges", 1), label="Refused") ax.bar(X + .1 + 2 * width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accurate") ax.set_xticks(X + .4) ax.set_ylabel("Ratio") ax.set_ylim(0, 35) ax.set_xticklabels(labels) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot speedups. ax = plt.subplot(4, 1, 3) width = (.8 / 3) colors = sns.color_palette("Greens", len(err_fns)) for i, err_fn in enumerate(db.err_fns): pairs = [result[6 + i] for result in results] speedups, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), speedups, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 7) ax.set_xticks(X + .4, labels) ax.set_ylabel("Speedup") art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # PERFORMANCE colors = sns.color_palette("Blues", len(err_fns)) width = (.8 / 3) ax = plt.subplot(4, 1, 4) for i, err_fn in enumerate(db.err_fns): pairs = [result[9 + i] for result in results] perfs, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), perfs, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width, perfs, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_xticklabels(labels) ax.set_ylim(0, 100) ax.set_ylabel("Performance") ax.set_xticks(X + .4, labels) title = kwargs.pop("title", "Classification results for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def regression_classification(db, output=None, job="xval", table="runtime_classification_results", **kwargs): """ Plot performance of classification using runtime regression. """ jobs = { "xval": "10-fold", "synthetic_real": "Synthetic", "arch": "Device", "kern": "Kernel", "data": "Dataset", } results = [] for job in jobs: speedup, serr, perf, perr, time, terr, correct = db.execute( "SELECT " " AVG(speedup), CONFERROR(speedup, .95), " " AVG(performance) * 100, CONFERROR(performance, .95) * 100, " " AVG(time) + 2.5, CONFERROR(time, .95), " " AVG(correct) * 100 " "FROM {} WHERE job=?".format(table), (job,) ).fetchone() results.append([job, speedup, serr, perf, perr, time, terr, correct]) # Zip into lists. labels, speedup, serr, perf, perr, time, terr, correct = zip(*results) labels = [jobs[x] for x in jobs] # Add averages. labels.append(r'\textbf{Average}') speedup += (labmath.mean(speedup),) serr += (labmath.mean(serr),) perf += (labmath.mean(perf),) perr += (labmath.mean(perr),) time += (labmath.mean(time),) terr += (labmath.mean(terr),) correct += (labmath.mean(correct),) X = np.arange(len(labels)) width = .8 # PLOT TIMES ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .5) ax.set_ylim(0, 150) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Classification time (ms)") # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # SPEEDUPS ax = plt.subplot(4, 1, 3) ax.bar(X + .1, speedup, width=width, color=sns.color_palette("Greens")) ax.set_xticks(X + .5) ax.set_ylim(0, 7) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Speedup") # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .5, speedup, fmt="none", yerr=serr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # PERFORMANCE ax = plt.subplot(4, 1, 4) ax.bar(X + .1, perf, width=width, color=sns.color_palette("Blues")) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Performance") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_ylim(0, 100) # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .5, perf, fmt="none", yerr=perr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # ACCURACY ax = plt.subplot(4, 1, 2) ax.bar(X + .1, correct, width=width, color=sns.color_palette("Reds")) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Accuracy") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_ylim(0, 12) viz.finalise(output, **kwargs)
def regression_classification(db, output=None, job="xval", table="runtime_classification_results", **kwargs): """ Plot performance of classification using runtime regression. """ jobs = { "xval": "10-fold", "synthetic_real": "Synthetic", "arch": "Device", "kern": "Kernel", "data": "Dataset", } results = [] for job in jobs: speedup, serr, perf, perr, time, terr, correct = db.execute( "SELECT " " AVG(speedup), CONFERROR(speedup, .95), " " AVG(performance) * 100, CONFERROR(performance, .95) * 100, " " AVG(time) + 2.5, CONFERROR(time, .95), " " AVG(correct) * 100 " "FROM {} WHERE job=?".format(table), (job, )).fetchone() results.append([job, speedup, serr, perf, perr, time, terr, correct]) # Zip into lists. labels, speedup, serr, perf, perr, time, terr, correct = zip(*results) labels = [jobs[x] for x in jobs] # Add averages. labels.append(r'\textbf{Average}') speedup += (labmath.mean(speedup), ) serr += (labmath.mean(serr), ) perf += (labmath.mean(perf), ) perr += (labmath.mean(perr), ) time += (labmath.mean(time), ) terr += (labmath.mean(terr), ) correct += (labmath.mean(correct), ) X = np.arange(len(labels)) width = .8 # PLOT TIMES ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .5) ax.set_ylim(0, 150) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Classification time (ms)") # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # SPEEDUPS ax = plt.subplot(4, 1, 3) ax.bar(X + .1, speedup, width=width, color=sns.color_palette("Greens")) ax.set_xticks(X + .5) ax.set_ylim(0, 7) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Speedup") # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .5, speedup, fmt="none", yerr=serr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # PERFORMANCE ax = plt.subplot(4, 1, 4) ax.bar(X + .1, perf, width=width, color=sns.color_palette("Blues")) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Performance") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_ylim(0, 100) # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .5, perf, fmt="none", yerr=perr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # ACCURACY ax = plt.subplot(4, 1, 2) ax.bar(X + .1, correct, width=width, color=sns.color_palette("Reds")) ax.set_xticks(X + .5) ax.set_xticklabels(labels, rotation='vertical') ax.set_ylabel("Accuracy") plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_ylim(0, 12) viz.finalise(output, **kwargs)
def speedup_classification(db, output=None, job="xval", **kwargs): """ Plot performance of classification using speedup regression. """ # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM speedup_classification_results\n" "WHERE job=? GROUP BY classifier", (job,) ) results = [] for classifier,count in query: basename = ml.classifier_basename(classifier) correct = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (count, job, classifier) ).fetchone()[0] # Get a list of mean speedups for each err_fn. speedups = [ row for row in db.execute( "SELECT\n" " AVG(speedup) * 100,\n" " CONFERROR(speedup, .95) * 100,\n" " AVG(performance) * 100,\n" " CONFERROR(performance, .95) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (job, classifier) ).fetchone() ] results.append([basename, correct] + speedups) # Zip into lists. labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results) X = np.arange(len(labels)) # Bar width. width = (.8 / (len(results[0]) - 1)) plt.bar(X + width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accuracy") plt.bar(X + 2 * width, speedups, width=width, color=sns.color_palette("Greens", 1), label="Speedup") plt.bar(X + 3 * width, perfs, width=width, color=sns.color_palette("Oranges", 1), label="Performance") # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = plt.errorbar(X + 2.5 * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) _,caps,_ = plt.errorbar(X + 3.5 * width, perfs, fmt="none", yerr=perf_yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.xlim(xmin=-.2) plt.xticks(X + .4, labels) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%' )) title = kwargs.pop("title", "Classification results for " + job + " using speedup regression") plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def plot_speedups_extended_model_2platform(platform_a, platform_b): """ Plot speedup of extended model over Grewe et al for 2 platforms """ aB = pd.read_csv(platform_a[0]) aB["synthetic"] = np.zeros(len(aB)) bB = pd.read_csv(platform_b[0]) bB["synthetic"] = np.zeros(len(bB)) B = pd.concat((aB, bB)) aS = pd.read_csv(platform_a[1]) aS["synthetic"] = np.ones(len(aS)) bS = pd.read_csv(platform_b[1]) bS["synthetic"] = np.ones(len(bS)) S = pd.concat((aS, bS)) aBS = pd.concat((aB, aS)) bBS = pd.concat((bB, bS)) BS = pd.concat((B, S)) assert (len(B) == len(aB) + len(bB)) # sanity checks assert (len(S) == len(aS) + len(bS)) assert (len(BS) == len(aBS) + len(bBS)) # get benchmark names: <suite>-<benchmark> benchmark_names = sorted(set([ re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)", b).group(1) for b in B["benchmark"] ])) # perform cross-validation B_out = [] for i, benchmark in enumerate(benchmark_names): print("\ranalyzing", i + 1, benchmark, end="") cgo13_clf, our_clf = cgo13.model(), get_our_model() cgo13_features, our_features = get_cgo13_features, get_our_features # cross validate on Grewe et al. and our model tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features, aBS, aBS, benchmark) for d in tmp: d["platform"] = "AMD Tahiti 7970" B_out += tmp # reset models cgo13_clf, our_clf = cgo13.model(), get_our_model() # same as before, on other platform: tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features, bBS, bBS, benchmark) for d in tmp: d["platform"] = "NVIDIA GTX 970" B_out += tmp print() # create results frame R_out = [] # get runtimes of device using predicted device for b in B_out: p1_runtime = b["runtime_" + b["p1"].lower()] p2_runtime = b["runtime_" + b["p2"].lower()] # speedup is the ratio of runtime using our predicted device # over runtime using CGO13 predicted device. b["p_speedup"] = p2_runtime / p1_runtime # get the benchmark name b["group"] = escape_benchmark_name(b["benchmark"]) R_out.append(b) R = pd.DataFrame(R_out) improved = R[R["p_speedup"] > 1] Amask = R["platform"] == "AMD Tahiti 7970" Bmask = R["platform"] == "NVIDIA GTX 970" a = R[Amask] b = R[Bmask] a_speedups = a.groupby(["group"])["p_speedup"].mean() b_speedups = b.groupby(["group"])["p_speedup"].mean() a_speedup = labmath.mean(a_speedups) b_speedup = labmath.mean(b_speedups) assert (len(R) == len(a) + len(b)) # sanity-check print(" #. benchmarks: ", len(set(B["benchmark"])), "kernels,", len(B), "observations") print(" #. synthetic: ", len(set(S["benchmark"])), "kernels,", len(S), "observations") print() print(" Speedup on AMD: {:.2f} x".format(a_speedup)) print(" Speedup on NVIDIA: {:.2f} x".format(b_speedup)) palette = sns.cubehelix_palette( len(set(R["platform"])), start=4, rot=.8, light=.8, dark=.3) R = R.append({ # average bars "group": "Average", "p_speedup": a_speedup, "platform": "AMD Tahiti 7970" }, ignore_index=True) R = R.append({ "group": "Average", "p_speedup": b_speedup, "platform": "NVIDIA GTX 970" }, ignore_index=True) R["p_speedup"] -= 1 # negative offset so that bars start at 1 ax = sns.barplot(x="group", y="p_speedup", hue="platform", data=R, palette=palette, ci=None) plt.ylabel("Speedup over Grewe et al."); plt.xlabel("") plt.axhline(y=0, color="k", lw=1) plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--") plt.ylim(-1, 9) plt.setp(ax.get_xticklabels(), rotation=90) # rotate x ticks ax.get_legend().set_title("") # legend plt.legend(loc='upper right') # counter negative offset ax.set_yticklabels([int(i) + 1 for i in ax.get_yticks()]) ax.get_legend().draw_frame(True) viz.finalise(figsize=(9, 4), tight=True)
def err_fn_performance(db, output=None, job="xval", **kwargs): err_fns = db.err_fns results = [ db.execute( "SELECT\n" " GEOMEAN(performance) * 100,\n" " CONFERROR(performance, .95) * 100,\n" " GEOMEAN(speedup) * 100,\n" " CONFERROR(speedup, .95) * 100\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND (illegal=1 or refused=1)", (job, err_fn)).fetchone() for err_fn in err_fns ] perfs, perfErrors, speedups, speedupErrors = zip(*results) X = np.arange(len(err_fns)) # Bar width. width = (.8 / (len(results[0]) - 1)) plt.bar(X, perfs, width=width, color=sns.color_palette("Reds", 1), label="Performance") # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = plt.errorbar(X + .5 * width, perfs, fmt="none", yerr=perfErrors, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.bar(X + width, speedups, width=width, color=sns.color_palette("Greens", 1), label="Speedup") # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = plt.errorbar(X + 1.5 * width, speedups, fmt="none", yerr=speedupErrors, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.xlim(xmin=-.2) plt.xticks(X + .4, err_fns) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) title = kwargs.pop("title", "Error handler performance for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def plot_speedups_extended_model(benchmarks_data, clgen_data): """ Plots speedups of extended model over Grewe et al Returns: speedup """ B = pd.read_csv(benchmarks_data) B["synthetic"] = np.zeros(len(B)) S = pd.read_csv(clgen_data) S["synthetic"] = np.ones(len(S)) BS = pd.concat((B, S)) assert (len(BS) == len(B) + len(S)) # get benchmark names: <suite>-<benchmark> benchmark_names = sorted(set([ re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)", b).group(1) for b in B["benchmark"] ])) # perform cross-validation B_out = [] for i, benchmark in enumerate(benchmark_names): print("\ranalyzing", i + 1, benchmark, end="") cgo13_clf, our_clf = cgo13.model(), get_our_model() cgo13_features, our_features = get_cgo13_features, get_our_features # cross validate on Grewe et al. and our model tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features, BS, BS, benchmark) B_out += tmp print() # create results frame R_out = [] # get runtimes of device using predicted device for b in B_out: p1_runtime = b["runtime_" + b["p1"].lower()] p2_runtime = b["runtime_" + b["p2"].lower()] # speedup is the ratio of runtime using our predicted device # over runtime using CGO13 predicted device. b["p_speedup"] = p2_runtime / p1_runtime # get the benchmark name b["group"] = escape_benchmark_name(b["benchmark"]) R_out.append(b) R = pd.DataFrame(R_out) improved = R[R["p_speedup"] > 1] speedups = R.groupby(["group"])["p_speedup"].mean() speedup = labmath.mean(speedups) print(" #. benchmarks: ", len(set(B["benchmark"])), "kernels,", len(B), "observations") print(" #. synthetic: ", len(set(S["benchmark"])), "kernels,", len(S), "observations") print() print(" Speedup: {:.2f} x".format(speedup)) palette = sns.cubehelix_palette(1, start=4, rot=.8, light=.8, dark=.3) R = R.append({ # average bar "group": "Average", "p_speedup": speedup }, ignore_index=True) R["p_speedup"] -= 1 # negative offset so that bars start at 1 ax = sns.barplot(x="group", y="p_speedup", data=R, palette=palette, ci=None) plt.ylabel("Speedup over Grewe et al."); plt.xlabel("") plt.axhline(y=0, color="k", lw=1) plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--") plt.ylim(-1, 9) plt.setp(ax.get_xticklabels(), rotation=90) # rotate x ticks # counter negative offset ax.set_yticklabels([int(i) + 1 for i in ax.get_yticks()]) viz.finalise(figsize=(7, 3.7), tight=True) return speedup
def test_finalise_tight(self): self._mkplot() viz.finalise("/tmp/labm8.png", tight=True) self.assertTrue(fs.exists("/tmp/labm8.png")) fs.rm("/tmp/labm8.png")
def test_finalise(): _MakeTestPlot() viz.finalise("/tmp/labm8.png") assert fs.exists("/tmp/labm8.png") fs.rm("/tmp/labm8.png")
def test_finalise_figsize(self): self._mkplot() viz.finalise("/tmp/labm8.png", figsize=(10, 5)) self.assertTrue(fs.exists("/tmp/labm8.png")) fs.rm("/tmp/labm8.png")
def test_finalise_figsize(): _MakeTestPlot() viz.finalise("/tmp/labm8.png", figsize=(10, 5)) assert fs.exists("/tmp/labm8.png") fs.rm("/tmp/labm8.png")
def classification(db, output=None, job="xval", **kwargs): err_fns = db.err_fns base_err_fn = err_fns[0] # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n" "GROUP BY classifier", (job,base_err_fn) ) results = [] # Add baseline results. baseline = ("4x4") correct = db.execute("SELECT Count(*) * 1.0 / 3 FROM classification_results " "WHERE job=? AND actual=?", (job,baseline)).fetchone()[0] illegal = 0 refused = 0 time = 0 terr = 0 speedup = (1, 0) perfs = [ row[1] for row in db.execute( "SELECT " " DISTINCT runtime_stats.scenario, " " (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 " "FROM classification_results " "LEFT JOIN runtime_stats " " ON classification_results.scenario=runtime_stats.scenario " "LEFT JOIN scenario_stats " " ON classification_results.scenario=scenario_stats.scenario " "WHERE job=? and runtime_stats.params=?", (job, baseline) ) ] perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True)) results.append(["ZeroR", correct, illegal, refused, time, terr, speedup, speedup, speedup, perf, perf, perf]) # Get results for classifier,count in query: basename = ml.classifier_basename(classifier) correct, illegal, refused, time, terr = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100,\n" " (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n" " (SUM(refused) / CAST(? AS FLOAT)) * 100,\n" " AVG(time) + 2.5,\n" " CONFERROR(time, .95) * 1.5\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (count, count, count, job, classifier, base_err_fn) ).fetchone() # Get a list of mean speedups for each err_fn. speedups = [ db.execute( "SELECT\n" " AVG(speedup),\n" " CONFERROR(speedup, .95)\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ).fetchone() for err_fn in err_fns ] # Get a list of mean perfs for each err_fn. perfs = [ db.execute( "SELECT\n" " AVG(performance) * 100.0,\n" " CONFERROR(performance, .95) * 100.0\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ).fetchone() for err_fn in err_fns ] results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs) # Zip into lists. labels, correct, illegal, refused, time, terr = zip(*[ (text.truncate(result[0], 40), result[1], result[2], result[3], result[4], result[5]) for result in results ]) X = np.arange(len(labels)) # PLOT TIMES width = .8 ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 10) ax.set_ylabel("Classification time (ms)") # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # RATIOS width = (.8 / 3) ax = plt.subplot(4, 1, 2) ax.bar(X + .1, illegal, width=width, color=sns.color_palette("Reds", 1), label="Illegal") ax.bar(X + .1 + width, refused, width=width, color=sns.color_palette("Oranges", 1), label="Refused") ax.bar(X + .1 + 2 * width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accurate") ax.set_xticks(X + .4) ax.set_ylabel("Ratio") ax.set_ylim(0, 35) ax.set_xticklabels(labels) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot speedups. ax = plt.subplot(4, 1, 3) width = (.8 / 3) colors=sns.color_palette("Greens", len(err_fns)) for i,err_fn in enumerate(db.err_fns): pairs = [result[6 + i] for result in results] speedups, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), speedups, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 7) ax.set_xticks(X + .4, labels) ax.set_ylabel("Speedup") art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # PERFORMANCE colors=sns.color_palette("Blues", len(err_fns)) width = (.8 / 3) ax = plt.subplot(4, 1, 4) for i,err_fn in enumerate(db.err_fns): pairs = [result[9 + i] for result in results] perfs, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), perfs, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, perfs, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_xticklabels(labels) ax.set_ylim(0, 100) ax.set_ylabel("Performance") ax.set_xticks(X + .4, labels) title = kwargs.pop("title", "Classification results for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)