示例#1
0
def err_fn_speedups(db, err_fn, output=None, sort=False, job="xval", **kwargs):
    """
    Plot speedup over the baseline of all classifiers for an err_fn.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for classifier in db.classification_classifiers:
        basename = ml.classifier_basename(classifier)
        performances = [
            row for row in db.execute(
                "SELECT speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier,
                                                              err_fn))
        ]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=basename)
    plt.plot([1 for _ in performances], "-", label="ZeroR")

    title = kwargs.pop("title", err_fn)
    ax.set_yscale("log")
    plt.title(title)
    plt.ylabel("Speedup (log)")
    plt.xlabel("Test instances")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#2
0
def err_fn_speedups(db, err_fn, output=None, sort=False,
                    job="xval", **kwargs):
    """
    Plot speedup over the baseline of all classifiers for an err_fn.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for classifier in db.classification_classifiers:
        basename = ml.classifier_basename(classifier)
        performances = [row for row in
                        db.execute("SELECT speedup\n"
                                   "FROM classification_results\n"
                                   "WHERE job=? AND classifier=? AND err_fn=?",
                                   (job, classifier, err_fn))]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=basename)
    plt.plot([1 for _ in performances], "-", label="ZeroR")

    title = kwargs.pop("title", err_fn)
    ax.set_yscale("log")
    plt.title(title)
    plt.ylabel("Speedup (log)")
    plt.xlabel("Test instances")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#3
0
def classifier_speedups(db,
                        classifier,
                        output=None,
                        sort=False,
                        job="xval_classifiers",
                        **kwargs):
    """
    Plot speedup over the baseline of a classifier for each err_fn.
    """
    for err_fn in db.err_fns:
        performances = [
            row for row in db.execute(
                "SELECT speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier,
                                                              err_fn))
        ]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=err_fn)

    basename = ml.classifier_basename(classifier)
    plt.title(basename)
    plt.ylabel("Speedup")
    plt.xlabel("Test instances")
    plt.axhline(y=1, color="k")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#4
0
def eval_classifiers(db, classifiers, err_fns, job, training, testing):
    """
    Cross validate a set of classifiers and err_fns.
    """
    for classifier in classifiers:
        meta = Classifier(classifier)
        prof.start("train classifier")
        meta.build_classifier(training)
        prof.stop("train classifier")
        basename = ml.classifier_basename(classifier.classname)

        for err_fn in err_fns:
            io.debug(job, basename, err_fn.func.__name__, testing.num_instances)
            for j,instance in enumerate(testing):
                eval_classifier_instance(job, db, meta, instance, err_fn,
                                         training)
            db.commit()
示例#5
0
def runtime_regression(db, output=None, job="xval", **kwargs):
    """
    Plot accuracy of a classifier at predicted runtime.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)

    colors = sns.color_palette()
    i, actual = 0, []

    for i, classifier in enumerate(db.regression_classifiers):
        basename = ml.classifier_basename(classifier)
        actual, predicted = zip(*sorted([
            row for row in db.execute(
                "SELECT\n"
                "    actual,\n"
                "    predicted\n"
                "FROM runtime_regression_results\n"
                "WHERE job=? AND classifier=?", (job, classifier))
        ],
                                        key=lambda x: x[0],
                                        reverse=True))

        if basename == "ZeroR":
            ax.plot(predicted, label=basename, color=colors[i - 1])
        else:
            ax.scatter(np.arange(len(predicted)),
                       predicted,
                       label=basename,
                       color=colors[i - 1])

    ax.plot(actual, label="Actual", color=colors[i])
    ax.set_yscale("log")
    plt.xlim(0, len(actual))
    plt.legend()
    title = kwargs.pop("title", "Runtime regression for " + job)
    plt.title(title)
    plt.xlabel("Test instances (sorted by descending runtime)")
    plt.ylabel("Runtime (ms, log)")
    viz.finalise(output, **kwargs)
示例#6
0
def runtime_regression(db, output=None, job="xval", **kwargs):
    """
    Plot accuracy of a classifier at predicted runtime.
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)

    colors = sns.color_palette()
    i, actual = 0, []

    for i,classifier in enumerate(db.regression_classifiers):
        basename = ml.classifier_basename(classifier)
        actual, predicted = zip(*sorted([
            row for row in
            db.execute(
                "SELECT\n"
                "    actual,\n"
                "    predicted\n"
                "FROM runtime_regression_results\n"
                "WHERE job=? AND classifier=?",
                (job, classifier)
            )
        ], key=lambda x: x[0], reverse=True))

        if basename == "ZeroR":
            ax.plot(predicted, label=basename, color=colors[i - 1])
        else:
            ax.scatter(np.arange(len(predicted)), predicted, label=basename,
                       color=colors[i - 1])

    ax.plot(actual, label="Actual", color=colors[i])
    ax.set_yscale("log")
    plt.xlim(0, len(actual))
    plt.legend()
    title = kwargs.pop("title", "Runtime regression for " + job)
    plt.title(title)
    plt.xlabel("Test instances (sorted by descending runtime)")
    plt.ylabel("Runtime (ms, log)")
    viz.finalise(output, **kwargs)
示例#7
0
def classifier_speedups(db, classifier, output=None, sort=False,
                        job="xval_classifiers", **kwargs):
    """
    Plot speedup over the baseline of a classifier for each err_fn.
    """
    for err_fn in db.err_fns:
        performances = [row for row in
                        db.execute("SELECT speedup\n"
                                   "FROM classification_results\n"
                                   "WHERE job=? AND classifier=? AND err_fn=?",
                                   (job, classifier, err_fn))]
        if sort: performances = sorted(performances, reverse=True)
        plt.plot(performances, "-", label=err_fn)

    basename = ml.classifier_basename(classifier)
    plt.title(basename)
    plt.ylabel("Speedup")
    plt.xlabel("Test instances")
    plt.axhline(y=1, color="k")
    plt.xlim(xmin=0, xmax=len(performances))
    plt.legend()
    viz.finalise(output, **kwargs)
示例#8
0
def eval_speedup_regressors(db, classifiers, baseline, rank_fn,
                            table, job, training, testing):
    maxwgsize_index = testing.attribute_by_name("kern_max_wg_size").index
    wg_c_index = testing.attribute_by_name("wg_c").index
    wg_r_index = testing.attribute_by_name("wg_r").index
    insert_str = ("INSERT INTO {} VALUES "
                  "(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)".format(table))

    for classifier in classifiers:
        meta = Classifier(classifier)
        prof.start("train classifier")
        meta.build_classifier(training)
        prof.stop("train classifier")
        basename = ml.classifier_basename(classifier.classname)
        classifier_id = db.classifier_id(classifier)

        io.debug(job, basename, testing.num_instances)
        scenarios = set([instance.get_string_value(0)
                         for instance in testing])
        instances = zip(scenarios, [
            (instance for instance in testing if
             instance.get_string_value(0) == scenario).next()
            for scenario in scenarios
        ])

        for scenario,instance in instances:
            maxwgsize = int(instance.get_value(maxwgsize_index))
            wlegal = space.enumerate_wlegal_params(maxwgsize)
            predictions = []

            elapsed = 0
            for params in wlegal:
                wg_c, wg_r = unhash_params(params)

                instance.set_value(wg_c_index, wg_c)
                instance.set_value(wg_r_index, wg_r)

                # Predict the speedup for a particular set of
                # parameters.
                prof.start()
                predicted = meta.classify_instance(instance)
                elapsed += prof.elapsed()
                predictions.append((params, predicted))

            # Rank the predictions from highest to lowest speedup.
            predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

            row = db.execute(
                "SELECT "
                "    oracle_param,"
                "    ("
                "        SELECT mean FROM runtime_stats "
                "        WHERE scenario=? AND params=?"
                "    ) * 1.0 / oracle_runtime  AS oracle_speedup,"
                "    worst_runtime / oracle_runtime AS actual_range "
                "FROM scenario_stats "
                "WHERE scenario=?",
                (scenario,baseline,scenario)).fetchone()
            actual = row[:2]

            predicted_range = predictions[-1][1] - predictions[0][1]
            actual_range = row[2] - row[1]

            num_attempts = 1
            while True:
                predicted = predictions.pop(0)

                try:
                    speedup = db.speedup(scenario, baseline, predicted[0])
                    perf = db.perf(scenario, predicted[0])

                    try:
                        speedup_he = self.speedup(scenario, HE_PARAM, predicted[0])
                    except:
                        speedup_he = None

                    try:
                        speedup_mo = self.speedup(scenario, MO_PARAM, predicted[0])
                    except:
                        speedup_mo = None

                    db.execute(insert_str,
                               (job, classifier_id, scenario, actual[0],
                                actual[1], predicted[0], predicted[1],
                                actual_range, predicted_range,
                                num_attempts,
                                1 if predicted[0] == actual[0] else 0,
                                perf, speedup, speedup_he, speedup_mo, elapsed))
                    break

                except _db.MissingDataError:
                    num_attempts += 1
                    pass

            db.commit()
示例#9
0
 def test_classifier_basename(self):
     self._test("ZeroR", ml.classifier_basename("weka.classifiers."
                                                "rules.ZeroR"))
     self._test("SMO", ml.classifier_basename("weka.classifiers."
                                              "functions.SMO -C 1.0"))
示例#10
0
def visualise_classification_job(db, job):
    basedir = "img/classification/{}/".format(job)

    fs.mkdir(basedir)
    fs.mkdir(basedir + "classifiers")
    fs.mkdir(basedir + "err_fns")

    visualise.err_fn_performance(db, basedir + "err_fns.png", job=job)

    # Bar plot of all results.
    visualise.classification(db, "img/classification/{}.png".format(job),
                             job=job)

    # Per-classifier plots.
    for i,classifier in enumerate(db.classification_classifiers):
        visualise.classifier_speedups(db, classifier,
                                      basedir + "classifiers/{}.png".format(i),
                                      job=job)
    # Per-err_fn plots.
    for err_fn in db.err_fns:
        visualise.err_fn_speedups(db, err_fn,
                                  basedir + "err_fns/{}.png".format(err_fn),
                                  job=job, sort=True)

    # Results table.
    query = db.execute(
        "SELECT classifier,err_fn,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? GROUP BY classifier,err_fn",
        (job,)
    )
    results = []
    for classifier,err_fn,count in query:
        correct, illegal, refused, performance, speedup = zip(*[
            row for row in db.execute(
                "SELECT correct,illegal,refused,performance,speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            )
        ])
        results.append([
            classifier,
            err_fn,
            (sum(correct) / count) * 100,
            (sum(illegal) / count) * 100,
            (sum(refused) / count) * 100,
            min(performance) * 100,
            labmath.geomean(performance) * 100,
            max(performance) * 100,
            min(speedup),
            labmath.geomean(speedup),
            max(speedup)
        ])

    str_args = {
        "float_format": lambda f: "{:.2f}".format(f)
    }

    for i in range(len(results)):
        results[i][0] = ml.classifier_basename(results[i][0])

    columns=(
        "CLASSIFIER",
        "ERR_FN",
        "ACC %",
        "INV %",
        "REF %",
        "Omin %",
        "Oavg %",
        "Omax %",
        "Smin",
        "Savg",
        "Smax",
    )

    latex.table(results, output=fs.path(experiment.TAB_ROOT, job + ".tex"),
                columns=columns, **str_args)
示例#11
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier",
        (job,base_err_fn)
    )
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute("SELECT Count(*) * 1.0 / 3 FROM classification_results "
                         "WHERE job=? AND actual=?", (job,baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in
        db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?",
            (job, baseline)
        )
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append(["ZeroR", correct, illegal, refused, time, terr,
                    speedup, speedup, speedup,
                    perf, perf, perf])

    # Get results
    for classifier,count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)
        ).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(*[
        (text.truncate(result[0], 40), result[1], result[2],
         result[3], result[4], result[5])
        for result in results
    ])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, time,
                           fmt="none", yerr=terr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, illegal, width=width,
           color=sns.color_palette("Reds", 1), label="Illegal")
    ax.bar(X + .1 + width, refused, width=width,
           color=sns.color_palette("Oranges", 1), label="Refused")
    ax.bar(X + .1 + 2 * width, correct, width=width,
           color=sns.color_palette("Blues", 1), label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors=sns.color_palette("Greens", len(err_fns))
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), speedups, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, speedups,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors=sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), perfs, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, perfs,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)


    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#12
0
def speedup_classification(db, output=None, job="xval", **kwargs):
    """
    Plot performance of classification using speedup regression.
    """
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM speedup_classification_results\n"
        "WHERE job=? GROUP BY classifier", (job,)
    )
    results = []
    for classifier,count in query:
        basename = ml.classifier_basename(classifier)
        correct = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100\n"
            "FROM speedup_classification_results\n"
            "WHERE job=? AND classifier=?",
            (count, job, classifier)
        ).fetchone()[0]
        # Get a list of mean speedups for each err_fn.
        speedups = [
            row for row in
            db.execute(
                "SELECT\n"
                "    AVG(speedup) * 100,\n"
                "    CONFERROR(speedup, .95) * 100,\n"
                "    AVG(performance) * 100,\n"
                "    CONFERROR(performance, .95) * 100\n"
                "FROM speedup_classification_results\n"
                "WHERE job=? AND classifier=?",
                (job, classifier)
            ).fetchone()
        ]

        results.append([basename, correct] + speedups)

    # Zip into lists.
    labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results)

    X = np.arange(len(labels))
    # Bar width.
    width = (.8 / (len(results[0]) - 1))

    plt.bar(X + width, correct, width=width,
            color=sns.color_palette("Blues", 1), label="Accuracy")
    plt.bar(X + 2 * width, speedups, width=width,
            color=sns.color_palette("Greens", 1), label="Speedup")
    plt.bar(X + 3 * width, perfs, width=width,
            color=sns.color_palette("Oranges", 1), label="Performance")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = plt.errorbar(X + 2.5 * width, speedups, fmt="none",
                            yerr=yerrs, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)
    _,caps,_ = plt.errorbar(X + 3.5 * width, perfs, fmt="none",
                            yerr=perf_yerrs, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.xlim(xmin=-.2)
    plt.xticks(X + .4, labels)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%' ))

    title = kwargs.pop("title",
                       "Classification results for " + job +
                       " using speedup regression")
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#13
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier", (job, base_err_fn))
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute(
        "SELECT Count(*) * 1.0 / 3 FROM classification_results "
        "WHERE job=? AND actual=?", (job, baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?", (job, baseline))
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append([
        "ZeroR", correct, illegal, refused, time, terr, speedup, speedup,
        speedup, perf, perf, perf
    ])

    # Get results
    for classifier, count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] +
                       speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(
        *[(text.truncate(result[0], 40), result[1], result[2], result[3],
           result[4], result[5]) for result in results])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             time,
                             fmt="none",
                             yerr=terr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1,
           illegal,
           width=width,
           color=sns.color_palette("Reds", 1),
           label="Illegal")
    ax.bar(X + .1 + width,
           refused,
           width=width,
           color=sns.color_palette("Oranges", 1),
           label="Refused")
    ax.bar(X + .1 + 2 * width,
           correct,
           width=width,
           color=sns.color_palette("Blues", 1),
           label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors = sns.color_palette("Greens", len(err_fns))
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               speedups,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 speedups,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors = sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               perfs,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 perfs,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)

    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#14
0
def speedup_classification(db, output=None, job="xval", **kwargs):
    """
    Plot performance of classification using speedup regression.
    """
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM speedup_classification_results\n"
        "WHERE job=? GROUP BY classifier", (job, ))
    results = []
    for classifier, count in query:
        basename = ml.classifier_basename(classifier)
        correct = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100\n"
            "FROM speedup_classification_results\n"
            "WHERE job=? AND classifier=?",
            (count, job, classifier)).fetchone()[0]
        # Get a list of mean speedups for each err_fn.
        speedups = [
            row for row in db.execute(
                "SELECT\n"
                "    AVG(speedup) * 100,\n"
                "    CONFERROR(speedup, .95) * 100,\n"
                "    AVG(performance) * 100,\n"
                "    CONFERROR(performance, .95) * 100\n"
                "FROM speedup_classification_results\n"
                "WHERE job=? AND classifier=?", (job, classifier)).fetchone()
        ]

        results.append([basename, correct] + speedups)

    # Zip into lists.
    labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results)

    X = np.arange(len(labels))
    # Bar width.
    width = (.8 / (len(results[0]) - 1))

    plt.bar(X + width,
            correct,
            width=width,
            color=sns.color_palette("Blues", 1),
            label="Accuracy")
    plt.bar(X + 2 * width,
            speedups,
            width=width,
            color=sns.color_palette("Greens", 1),
            label="Speedup")
    plt.bar(X + 3 * width,
            perfs,
            width=width,
            color=sns.color_palette("Oranges", 1),
            label="Performance")
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = plt.errorbar(X + 2.5 * width,
                              speedups,
                              fmt="none",
                              yerr=yerrs,
                              capsize=3,
                              ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)
    _, caps, _ = plt.errorbar(X + 3.5 * width,
                              perfs,
                              fmt="none",
                              yerr=perf_yerrs,
                              capsize=3,
                              ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    plt.xlim(xmin=-.2)
    plt.xticks(X + .4, labels)
    plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))

    title = kwargs.pop(
        "title",
        "Classification results for " + job + " using speedup regression")
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
示例#15
0
def visualise_classification_job(db, job):
    basedir = "img/classification/{}/".format(job)

    fs.mkdir(basedir)
    fs.mkdir(basedir + "classifiers")
    fs.mkdir(basedir + "err_fns")

    visualise.err_fn_performance(db, basedir + "err_fns.png", job=job)

    # Bar plot of all results.
    visualise.classification(db,
                             "img/classification/{}.png".format(job),
                             job=job)

    # Per-classifier plots.
    for i, classifier in enumerate(db.classification_classifiers):
        visualise.classifier_speedups(db,
                                      classifier,
                                      basedir + "classifiers/{}.png".format(i),
                                      job=job)
    # Per-err_fn plots.
    for err_fn in db.err_fns:
        visualise.err_fn_speedups(db,
                                  err_fn,
                                  basedir + "err_fns/{}.png".format(err_fn),
                                  job=job,
                                  sort=True)

    # Results table.
    query = db.execute(
        "SELECT classifier,err_fn,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? GROUP BY classifier,err_fn", (job, ))
    results = []
    for classifier, err_fn, count in query:
        correct, illegal, refused, performance, speedup = zip(*[
            row for row in db.execute(
                "SELECT correct,illegal,refused,performance,speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier,
                                                              err_fn))
        ])
        results.append([
            classifier, err_fn, (sum(correct) / count) * 100,
            (sum(illegal) / count) * 100, (sum(refused) / count) * 100,
            min(performance) * 100,
            labmath.geomean(performance) * 100,
            max(performance) * 100,
            min(speedup),
            labmath.geomean(speedup),
            max(speedup)
        ])

    str_args = {"float_format": lambda f: "{:.2f}".format(f)}

    for i in range(len(results)):
        results[i][0] = ml.classifier_basename(results[i][0])

    columns = (
        "CLASSIFIER",
        "ERR_FN",
        "ACC %",
        "INV %",
        "REF %",
        "Omin %",
        "Oavg %",
        "Omax %",
        "Smin",
        "Savg",
        "Smax",
    )

    latex.table(results,
                output=fs.path(experiment.TAB_ROOT, job + ".tex"),
                columns=columns,
                **str_args)