Пример #1
0
def summarize(csv_path: str) -> OrderedDict:
    """
    Summarize a CSV file of feature values.

    Parameters
    ----------
    csv_path : str
        Path to csv.

    Returns
    -------
    OrderedDict
        Summary values.
    """
    with open(csv_path) as infile:
        reader = csv.reader(infile)
        table = [row for row in reader]

    d = OrderedDict()
    ignored_cols = 2
    d['datapoints'] = len(table) - 1
    for i, col in enumerate(table[0][ignored_cols:]):
        i += ignored_cols
        d[col] = labmath.mean([float(r[i]) for r in table[1:]])

    return d
Пример #2
0
    def profile(self,
                queue,
                size: int = 16,
                must_validate: bool = False,
                out=sys.stdout,
                metaout=sys.stderr,
                min_num_iterations: int = 10):
        """
        Run kernel and profile runtime.

        Output format (CSV):

            out:      <kernel> <wgsize> <transfer> <runtime> <ci>
            metaout:  <error> <kernel>
        """
        assert (isinstance(queue, cl.CommandQueue))

        if must_validate:
            try:
                self.validate(queue, size)
            except CLDriveException as e:
                print(type(e).__name__, self.name, sep=',', file=metaout)

        P = KernelPayload.create_random(self, size)
        k = partial(self, queue)

        while len(self.runtimes) < min_num_iterations:
            k(P)

        wgsize = int(round(labmath.mean(self.wgsizes)))
        transfer = int(round(labmath.mean(self.transfers)))
        mean = labmath.mean(self.runtimes)
        ci = labmath.confinterval(self.runtimes, array_mean=mean)[1] - mean
        print(self.name,
              wgsize,
              transfer,
              round(mean, 6),
              round(ci, 6),
              sep=',',
              file=out)
Пример #3
0
def summarize(csv_path):
    with open(csv_path) as infile:
        reader = csv.reader(infile)
        table = [row for row in reader]

    d = OrderedDict()
    ignored_cols = 2
    d['datapoints'] = len(table) - 1
    for i,col in enumerate(table[0][ignored_cols:]):
        i += ignored_cols
        d[col] = labmath.mean([float(r[i]) for r in table[1:]])

    return d
Пример #4
0
def plot_speedups_extended_model(benchmarks_data, clgen_data):
  """
  Plots speedups of extended model over Grewe et al

  Returns: speedup
  """
  B = pd.read_csv(benchmarks_data)
  B["synthetic"] = np.zeros(len(B))

  S = pd.read_csv(clgen_data)
  S["synthetic"] = np.ones(len(S))

  BS = pd.concat((B, S))

  assert (len(BS) == len(B) + len(S))

  # get benchmark names: <suite>-<benchmark>
  benchmark_names = sorted(set([
    re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)", b).group(1)
    for b in B["benchmark"]
  ]))

  # perform cross-validation
  B_out = []
  for i, benchmark in enumerate(benchmark_names):
    print("\ranalyzing", i + 1, benchmark, end="")
    cgo13_clf, our_clf = cgo13.model(), get_our_model()
    cgo13_features, our_features = get_cgo13_features, get_our_features

    # cross validate on Grewe et al. and our model
    tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features,
                        BS, BS, benchmark)
    B_out += tmp
  print()

  # create results frame
  R_out = []
  # get runtimes of device using predicted device
  for b in B_out:
    p1_runtime = b["runtime_" + b["p1"].lower()]
    p2_runtime = b["runtime_" + b["p2"].lower()]

    # speedup is the ratio of runtime using our predicted device
    # over runtime using CGO13 predicted device.
    b["p_speedup"] = p2_runtime / p1_runtime

    # get the benchmark name
    b["group"] = escape_benchmark_name(b["benchmark"])

    R_out.append(b)
  R = pd.DataFrame(R_out)

  improved = R[R["p_speedup"] > 1]

  speedups = R.groupby(["group"])["p_speedup"].mean()
  speedup = labmath.mean(speedups)

  print("  #. benchmarks:          ",
        len(set(B["benchmark"])), "kernels,", len(B), "observations")
  print("  #. synthetic:           ",
        len(set(S["benchmark"])), "kernels,", len(S), "observations")
  print()
  print("  Speedup:                 {:.2f} x".format(speedup))

  palette = sns.cubehelix_palette(1, start=4, rot=.8, light=.8, dark=.3)

  R = R.append({  # average bar
    "group": "Average",
    "p_speedup": speedup
  }, ignore_index=True)

  R["p_speedup"] -= 1  # negative offset so that bars start at 1

  ax = sns.barplot(x="group", y="p_speedup", data=R,
                   palette=palette, ci=None)

  plt.ylabel("Speedup over Grewe et al.");
  plt.xlabel("")

  plt.axhline(y=0, color="k", lw=1)
  plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--")
  plt.ylim(-1, 9)
  plt.setp(ax.get_xticklabels(), rotation=90)  # rotate x ticks

  # counter negative offset
  ax.set_yticklabels([int(i) + 1 for i in ax.get_yticks()])

  viz.finalise(figsize=(7, 3.7), tight=True)
  return speedup
Пример #5
0
def plot_speedups_extended_model_2platform(platform_a, platform_b):
  """
  Plot speedup of extended model over Grewe et al for 2 platforms
  """
  aB = pd.read_csv(platform_a[0])
  aB["synthetic"] = np.zeros(len(aB))
  bB = pd.read_csv(platform_b[0])
  bB["synthetic"] = np.zeros(len(bB))
  B = pd.concat((aB, bB))

  aS = pd.read_csv(platform_a[1])
  aS["synthetic"] = np.ones(len(aS))
  bS = pd.read_csv(platform_b[1])
  bS["synthetic"] = np.ones(len(bS))
  S = pd.concat((aS, bS))

  aBS = pd.concat((aB, aS))
  bBS = pd.concat((bB, bS))
  BS = pd.concat((B, S))

  assert (len(B) == len(aB) + len(bB))  # sanity checks
  assert (len(S) == len(aS) + len(bS))
  assert (len(BS) == len(aBS) + len(bBS))

  # get benchmark names: <suite>-<benchmark>
  benchmark_names = sorted(set([
    re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)", b).group(1)
    for b in B["benchmark"]
  ]))

  # perform cross-validation
  B_out = []
  for i, benchmark in enumerate(benchmark_names):
    print("\ranalyzing", i + 1, benchmark, end="")
    cgo13_clf, our_clf = cgo13.model(), get_our_model()
    cgo13_features, our_features = get_cgo13_features, get_our_features

    # cross validate on Grewe et al. and our model
    tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features,
                        aBS, aBS, benchmark)
    for d in tmp: d["platform"] = "AMD Tahiti 7970"
    B_out += tmp

    # reset models
    cgo13_clf, our_clf = cgo13.model(), get_our_model()

    # same as before, on other platform:
    tmp = _compare_clfs(cgo13_clf, cgo13_features, our_clf, our_features,
                        bBS, bBS, benchmark)
    for d in tmp: d["platform"] = "NVIDIA GTX 970"
    B_out += tmp
  print()

  # create results frame
  R_out = []
  # get runtimes of device using predicted device
  for b in B_out:
    p1_runtime = b["runtime_" + b["p1"].lower()]
    p2_runtime = b["runtime_" + b["p2"].lower()]

    # speedup is the ratio of runtime using our predicted device
    # over runtime using CGO13 predicted device.
    b["p_speedup"] = p2_runtime / p1_runtime

    # get the benchmark name
    b["group"] = escape_benchmark_name(b["benchmark"])

    R_out.append(b)
  R = pd.DataFrame(R_out)

  improved = R[R["p_speedup"] > 1]

  Amask = R["platform"] == "AMD Tahiti 7970"
  Bmask = R["platform"] == "NVIDIA GTX 970"
  a = R[Amask]
  b = R[Bmask]

  a_speedups = a.groupby(["group"])["p_speedup"].mean()
  b_speedups = b.groupby(["group"])["p_speedup"].mean()

  a_speedup = labmath.mean(a_speedups)
  b_speedup = labmath.mean(b_speedups)

  assert (len(R) == len(a) + len(b))  # sanity-check

  print("  #. benchmarks:          ",
        len(set(B["benchmark"])), "kernels,", len(B), "observations")
  print("  #. synthetic:           ",
        len(set(S["benchmark"])), "kernels,", len(S), "observations")
  print()
  print("  Speedup on AMD:          {:.2f} x".format(a_speedup))
  print("  Speedup on NVIDIA:       {:.2f} x".format(b_speedup))

  palette = sns.cubehelix_palette(
      len(set(R["platform"])), start=4, rot=.8, light=.8, dark=.3)

  R = R.append({  # average bars
    "group": "Average",
    "p_speedup": a_speedup,
    "platform": "AMD Tahiti 7970"
  }, ignore_index=True)
  R = R.append({
    "group": "Average",
    "p_speedup": b_speedup,
    "platform": "NVIDIA GTX 970"
  }, ignore_index=True)

  R["p_speedup"] -= 1  # negative offset so that bars start at 1

  ax = sns.barplot(x="group", y="p_speedup", hue="platform", data=R,
                   palette=palette, ci=None)

  plt.ylabel("Speedup over Grewe et al.");
  plt.xlabel("")

  plt.axhline(y=0, color="k", lw=1)
  plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--")
  plt.ylim(-1, 9)
  plt.setp(ax.get_xticklabels(), rotation=90)  # rotate x ticks
  ax.get_legend().set_title("")  # legend
  plt.legend(loc='upper right')

  # counter negative offset
  ax.set_yticklabels([int(i) + 1 for i in ax.get_yticks()])

  ax.get_legend().draw_frame(True)

  viz.finalise(figsize=(9, 4), tight=True)
Пример #6
0
def plot_speedups_with_clgen(benchmarks_data, clgen_data, suite="npb"):
  """
  Plot speedups of predictive models trained with and without clgen.

  Returns speedups (without and with).
  """
  # datasets: B - benchmarks, S - synthetics, BS - benchmarks + synthetics:
  B = pd.read_csv(benchmarks_data)
  B["group"] = ["B"] * len(B)

  S = pd.read_csv(clgen_data)
  S["group"] = ["S"] * len(S)

  BS = pd.concat((B, S))

  # find the ZeroR. This is the device which is most frequently optimal
  Bmask = B[B["benchmark"].str.contains(suite)]
  zeror = Counter(Bmask["oracle"]).most_common(1)[0][0]
  zeror_runtime = "runtime_" + zeror.lower()

  # get the names of the benchmarks, in the form: $suite-$version-$benchmark
  benchmark_names = sorted(set([
    re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)-", b).group(1)
    for b in B["benchmark"] if b.startswith(suite)
  ]))

  B_out, BS_out = [], []
  for benchmark in benchmark_names:
    clf = cgo13.model()
    features = get_cgo13_features
    # cross validate on baseline
    B_out += cgo13.leave_one_benchmark_out(clf, features, B, benchmark)
    # reset model
    clf = cgo13.model()
    # repeate cross-validation with synthetic kernels
    BS_out += cgo13.leave_one_benchmark_out(clf, features, BS, benchmark)

  # create results frame
  R_out = []
  for b, bs in zip(B_out, BS_out):
    # get runtimes of device using predicted device
    b_p_runtime = b["runtime_" + b["p"].lower()]
    bs_p_runtime = bs["runtime_" + bs["p"].lower()]

    # speedup is the ratio of runtime using the predicted device
    # over runtime using ZeroR device
    b["p_speedup"] = b_p_runtime / b[zeror_runtime]
    bs["p_speedup"] = bs_p_runtime / bs[zeror_runtime]

    if "training" in benchmarks_data:
      # $benchmark
      group = escape_benchmark_name(b["benchmark"])
    else:
      # $benchmark.$dataset
      group = re.sub(r"[^-]+-[0-9\.]+-([^-]+)-.+", r"\1",
                     b["benchmark"]) + "." + b["dataset"]
    b["group"] = group
    bs["group"] = group

    # set the training data type
    b["training"] = "Grewe et al."
    bs["training"] = "w. CLgen"

    R_out.append(b)
    R_out.append(bs)

  R = pd.DataFrame(R_out)

  b_mask = R["training"] == "Grewe et al."
  bs_mask = R["training"] == "w. CLgen"

  B_speedup = labmath.mean(R[b_mask].groupby(["group"])["p_speedup"].mean())
  BS_speedup = labmath.mean(R[bs_mask].groupby(["group"])["p_speedup"].mean())

  print("  #. benchmarks:                  ",
        len(set(B["benchmark"])), "kernels,", len(B), "observations")
  print("  #. synthetic:                   ",
        len(set(S["benchmark"])), "kernels,", len(S), "observations")
  print()
  print("  ZeroR device:                    {}".format(zeror))
  print()
  print("  Speedup of Grewe et al.:         {:.2f} x".format(B_speedup))
  print("  Speedup w. CLgen:                {:.2f} x".format(BS_speedup))

  R = R.append({  # average bars
    "group": "Average",
    "p_speedup": B_speedup,
    "training": "Grewe et al."
  }, ignore_index=True)
  R = R.append({
    "group": "Average",
    "p_speedup": BS_speedup,
    "training": "w. CLgen"
  }, ignore_index=True)

  R["p_speedup"] -= 1  # negative offset so that bars start at 1

  # colors
  palette = sns.cubehelix_palette(len(set(R["training"])),
                                  rot=-.4, light=.85, dark=.35)

  ax = sns.barplot(
      x="group", y="p_speedup", data=R, ci=None, hue="training",
      palette=palette)
  plt.ylabel("Speedup")
  plt.xlabel("")

  plt.axhline(y=0, color="k", lw=1)  # speedup line
  plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1,
              linestyle="--")  # average line

  ax.get_legend().set_title("")  # no legend title
  plt.legend(loc='upper right')
  ax.get_legend().draw_frame(True)

  # plot shape and size
  figsize = (9, 2.2)
  if "nvidia" in benchmarks_data:
    typecast = int;
    plt.ylim(-1, 16)
  elif "training" in benchmarks_data:
    typecast = float;
    figsize = (7, 3.2)
  else:
    typecast = float

  # counter negative offset:
  ax.set_yticklabels([typecast(i) + 1 for i in ax.get_yticks()])

  plt.setp(ax.get_xticklabels(), rotation=90)

  viz.finalise(figsize=figsize, tight=True)
  return B_speedup, BS_speedup
Пример #7
0
 def test_mean(self):
     self._test(2, labmath.mean([1,2,3]))
     self._test((1/3.), labmath.mean([1,1.5,-1.5]))
     self._test(2, labmath.mean([2,2,2,2,2]))
     self._test(2.5, labmath.mean([1,2,3,4]))
Пример #8
0
 def test_mean_single_item_array(self):
     self._test(1, labmath.mean([1]))
Пример #9
0
 def test_mean_empty_array(self):
     self._test(0, labmath.mean([]))
Пример #10
0
def regression_classification(db, output=None, job="xval",
                              table="runtime_classification_results",
                              **kwargs):
    """
    Plot performance of classification using runtime regression.
    """
    jobs = {
        "xval": "10-fold",
        "synthetic_real": "Synthetic",
        "arch": "Device",
        "kern": "Kernel",
        "data": "Dataset",
    }

    results = []
    for job in jobs:
        speedup, serr, perf, perr, time, terr, correct = db.execute(
            "SELECT "
            "  AVG(speedup), CONFERROR(speedup, .95), "
            "  AVG(performance) * 100, CONFERROR(performance, .95) * 100, "
            "  AVG(time) + 2.5, CONFERROR(time, .95), "
            "  AVG(correct) * 100 "
            "FROM {} WHERE job=?".format(table),
            (job,)
        ).fetchone()
        results.append([job, speedup, serr, perf, perr, time, terr, correct])

    # Zip into lists.
    labels, speedup, serr, perf, perr, time, terr, correct = zip(*results)
    labels = [jobs[x] for x in jobs]

    # Add averages.
    labels.append(r'\textbf{Average}')
    speedup += (labmath.mean(speedup),)
    serr += (labmath.mean(serr),)
    perf += (labmath.mean(perf),)
    perr += (labmath.mean(perr),)
    time += (labmath.mean(time),)
    terr += (labmath.mean(terr),)
    correct += (labmath.mean(correct),)

    X = np.arange(len(labels))

    width = .8

    # PLOT TIMES
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 150)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Classification time (ms)")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, time,
                           fmt="none", yerr=terr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # SPEEDUPS
    ax = plt.subplot(4, 1, 3)
    ax.bar(X + .1, speedup, width=width, color=sns.color_palette("Greens"))
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 7)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Speedup")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, speedup,
                           fmt="none", yerr=serr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # PERFORMANCE
    ax = plt.subplot(4, 1, 4)
    ax.bar(X + .1, perf, width=width, color=sns.color_palette("Blues"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Performance")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 100)
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, perf,
                           fmt="none", yerr=perr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # ACCURACY
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, correct, width=width, color=sns.color_palette("Reds"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Accuracy")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 12)

    viz.finalise(output, **kwargs)
Пример #11
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier",
        (job,base_err_fn)
    )
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute("SELECT Count(*) * 1.0 / 3 FROM classification_results "
                         "WHERE job=? AND actual=?", (job,baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in
        db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?",
            (job, baseline)
        )
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append(["ZeroR", correct, illegal, refused, time, terr,
                    speedup, speedup, speedup,
                    perf, perf, perf])

    # Get results
    for classifier,count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)
        ).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(*[
        (text.truncate(result[0], 40), result[1], result[2],
         result[3], result[4], result[5])
        for result in results
    ])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, time,
                           fmt="none", yerr=terr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, illegal, width=width,
           color=sns.color_palette("Reds", 1), label="Illegal")
    ax.bar(X + .1 + width, refused, width=width,
           color=sns.color_palette("Oranges", 1), label="Refused")
    ax.bar(X + .1 + 2 * width, correct, width=width,
           color=sns.color_palette("Blues", 1), label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors=sns.color_palette("Greens", len(err_fns))
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), speedups, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, speedups,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors=sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), perfs, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, perfs,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)


    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
Пример #12
0
def regression_classification(db,
                              output=None,
                              job="xval",
                              table="runtime_classification_results",
                              **kwargs):
    """
    Plot performance of classification using runtime regression.
    """
    jobs = {
        "xval": "10-fold",
        "synthetic_real": "Synthetic",
        "arch": "Device",
        "kern": "Kernel",
        "data": "Dataset",
    }

    results = []
    for job in jobs:
        speedup, serr, perf, perr, time, terr, correct = db.execute(
            "SELECT "
            "  AVG(speedup), CONFERROR(speedup, .95), "
            "  AVG(performance) * 100, CONFERROR(performance, .95) * 100, "
            "  AVG(time) + 2.5, CONFERROR(time, .95), "
            "  AVG(correct) * 100 "
            "FROM {} WHERE job=?".format(table), (job, )).fetchone()
        results.append([job, speedup, serr, perf, perr, time, terr, correct])

    # Zip into lists.
    labels, speedup, serr, perf, perr, time, terr, correct = zip(*results)
    labels = [jobs[x] for x in jobs]

    # Add averages.
    labels.append(r'\textbf{Average}')
    speedup += (labmath.mean(speedup), )
    serr += (labmath.mean(serr), )
    perf += (labmath.mean(perf), )
    perr += (labmath.mean(perr), )
    time += (labmath.mean(time), )
    terr += (labmath.mean(terr), )
    correct += (labmath.mean(correct), )

    X = np.arange(len(labels))

    width = .8

    # PLOT TIMES
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 150)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Classification time (ms)")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             time,
                             fmt="none",
                             yerr=terr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # SPEEDUPS
    ax = plt.subplot(4, 1, 3)
    ax.bar(X + .1, speedup, width=width, color=sns.color_palette("Greens"))
    ax.set_xticks(X + .5)
    ax.set_ylim(0, 7)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Speedup")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             speedup,
                             fmt="none",
                             yerr=serr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # PERFORMANCE
    ax = plt.subplot(4, 1, 4)
    ax.bar(X + .1, perf, width=width, color=sns.color_palette("Blues"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Performance")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 100)
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             perf,
                             fmt="none",
                             yerr=perr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # ACCURACY
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, correct, width=width, color=sns.color_palette("Reds"))
    ax.set_xticks(X + .5)
    ax.set_xticklabels(labels, rotation='vertical')
    ax.set_ylabel("Accuracy")
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_ylim(0, 12)

    viz.finalise(output, **kwargs)
Пример #13
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier", (job, base_err_fn))
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute(
        "SELECT Count(*) * 1.0 / 3 FROM classification_results "
        "WHERE job=? AND actual=?", (job, baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?", (job, baseline))
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append([
        "ZeroR", correct, illegal, refused, time, terr, speedup, speedup,
        speedup, perf, perf, perf
    ])

    # Get results
    for classifier, count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] +
                       speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(
        *[(text.truncate(result[0], 40), result[1], result[2], result[3],
           result[4], result[5]) for result in results])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             time,
                             fmt="none",
                             yerr=terr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1,
           illegal,
           width=width,
           color=sns.color_palette("Reds", 1),
           label="Illegal")
    ax.bar(X + .1 + width,
           refused,
           width=width,
           color=sns.color_palette("Oranges", 1),
           label="Refused")
    ax.bar(X + .1 + 2 * width,
           correct,
           width=width,
           color=sns.color_palette("Blues", 1),
           label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors = sns.color_palette("Greens", len(err_fns))
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               speedups,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 speedups,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors = sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               perfs,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 perfs,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)

    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)