示例#1
0
def find_regressions(kernels, test_runs, metric):
    # A test is regressed on some platform if its latest results are
    #  definitely lower than on the reference kernel.
    # Runs for the latest kernel may be underway and incomplete.
    # In that case, selectively use next-latest kernel.
    # TODO: the next-latest method hurts if latest run is not sorted last,
    #       or if there are several dev threads
    ref = kernels[0]
    latest = kernels[-1]
    prev = kernels[-2:][0]
    scores = {}  #  kernel --> (platform --> list of perf scores)
    for k in [ref, prev, latest]:
        if k in test_runs:
            scores[k] = collect_raw_scores(test_runs[k], metric)
    regressed_platforms = []
    for platform in scores[ref]:
        if latest in scores and platform in scores[latest]:
            k = latest
        elif prev in scores and platform in scores[prev]:
            k = prev
        else:  # perhaps due to decay of test machines
            k = ref  # no regression info avail
        ref_avg, ref_std = plotgraph.avg_dev(scores[ref][platform])
        avg, std = plotgraph.avg_dev(scores[k][platform])
        if avg + std < ref_avg - ref_std:
            regressed_platforms.append(platform)
    return sorted(regressed_platforms)
示例#2
0
def find_regressions(kernels, test_runs, metric):
    # A test is regressed on some platform if its latest results are
    #  definitely lower than on the reference kernel.
    # Runs for the latest kernel may be underway and incomplete.
    # In that case, selectively use next-latest kernel.
    # TODO: the next-latest method hurts if latest run is not sorted last,
    #       or if there are several dev threads
    ref = kernels[0]
    latest = kernels[-1]
    prev = kernels[-2:][0]
    scores = {}  #  kernel --> (platform --> list of perf scores)
    for k in [ref, prev, latest]:
        if k in test_runs:
            scores[k] = collect_raw_scores(test_runs[k], metric)
    regressed_platforms = []
    for platform in scores[ref]:
        if latest in scores and platform in scores[latest]:
            k = latest
        elif prev in scores and platform in scores[prev]:
            k = prev
        else:  # perhaps due to decay of test machines
            k = ref  # no regression info avail
        ref_avg, ref_std = plotgraph.avg_dev(scores[ref][platform])
        avg, std = plotgraph.avg_dev(scores[k][platform])
        if avg + std < ref_avg - ref_std:
            regressed_platforms.append(platform)
    return sorted(regressed_platforms)
示例#3
0
    def analyze_variants_all_tests_1_platform(self, platform, vary):
        # generate one graph image for results of all benchmarks
        # on one platform and one kernel, comparing effects of
        # two or more combos of kernel options (test run attributes)
        #   (numa_fake,stale_page,kswapd_merge,sched_idle, etc)
        kernel = self.cgiform.getvalue('kernel', 'some_kernel')
        self.passthru.append('kernel=%s' % kernel)

        # two or more vary_groups, one for each plotted line,
        # each group begins with vary= and ends with next  &
        # each group has comma-separated list of test attribute key=val pairs
        #    eg   vary=keyval1,keyval2&vary=keyval3,keyval4
        vary_groups = [
            dict(pair.split('=', 1) for pair in vary_group.split(','))
            for vary_group in vary
        ]

        test = self.benchmarks[0]  # pick any test in all jobs
        kernels, test_tag = self.jobs_selector(test, self.job_table,
                                               self.kernel_dates)

        linekeys = {}
        plot_data = {}
        baselines = {}
        for i, vary_group in enumerate(vary_groups):
            group_attributes = self.test_attributes.copy()
            group_attributes.update(vary_group)
            linekey = ','.join('%s=%s' % (attr, vary_group[attr])
                               for attr in vary_group)
            linekeys[i] = linekey
            data = {}
            for benchmark in self.benchmarks:
                metric = perf.benchmark_main_metric(benchmark)
                runs = collect_testruns(self.job_table[kernel],
                                        benchmark + test_tag, group_attributes,
                                        self.platforms_filter, 'by_hosts'
                                        in self.toggles, self.no_antag)
                vals = []
                for testrunx in runs[platform]:
                    vals += perf.get_metric_at_point([testrunx], metric)
                if vals:
                    if benchmark not in baselines:
                        baselines[benchmark], stddev = plotgraph.avg_dev(vals)
                    vals = [val / baselines[benchmark] for val in vals]
                    data[benchmark] = vals
            plot_data[i] = data

        title = "%s on %s" % (kernel, platform)
        for attr in self.test_attributes:
            title += ', %s=%s' % (attr, self.test_attributes[attr])
        if 'table' in self.cgiform:
            self.table_for_variants_all_tests(title,
                                              plot_data,
                                              linekeys,
                                              range(len(linekeys)),
                                              filtered_passthru=self.passthru,
                                              test_tag=test_tag)
        else:
            graph_variants_all_tests(title, plot_data, linekeys, self.size,
                                     'dark' in self.toggles)
示例#4
0
def table_variants_all_tests(plot_data, columns, colkeys, benchmarks, myurl,
                             filtered_passthru):
    # generate html table of graph's numbers
    #   for primary metric over all benchmarks (rows),
    #   on one platform and one kernel,
    #   over various combos of test run attribute constraints (cols).
    ref_thresholds = {}
    print "<table border=1 cellpadding=3 cellspacing=0>"
    print "<tr> <td><b> Benchmark </b></td>",
    for col in columns:
        print "<td><b>", colkeys[col].replace(',', ',<br>'), "</b></td>"
    print "</tr>"
    for benchmark in benchmarks:
        print "<tr> <td><b>", benchmark, "</b></td>"
        for col in columns:
            print "<td>",
            vals = plot_data[col].get(benchmark, [])
            if not vals:
                print "?",
            else:
                (avg, std_dev) = plotgraph.avg_dev(vals)
                args = filtered_passthru[:]
                perf.append_cgi_args(args, {'test': benchmark})
                for keyval in colkeys[col].split(','):
                    key, val = keyval.split('=', 1)
                    perf.append_cgi_args(args, {key: val})
                print "<a href='%s?%s&runs&attrs'>" % (myurl, '&'.join(args))
                print "<b>%.4g</b>" % avg, "</a><br>",
                print "&nbsp; <small> %dr   </small>" % len(vals),
                print "&nbsp; <small> %.3g </small>" % std_dev,
            print "</td>"
        print "</tr>\n"
    print "</table>"
    print "<p> <b>Bold value:</b> Average of this metric, then <br>"
    print "number of good test runs, then standard deviation of those runs"
示例#5
0
def table_variants_all_tests(plot_data, columns, colkeys, benchmarks, myurl, filtered_passthru):
    # generate html table of graph's numbers
    #   for primary metric over all benchmarks (rows),
    #   on one platform and one kernel,
    #   over various combos of test run attribute constraints (cols).
    ref_thresholds = {}
    print "<table border=1 cellpadding=3 cellspacing=0>"
    print "<tr> <td><b> Benchmark </b></td>",
    for col in columns:
        print "<td><b>", colkeys[col].replace(",", ",<br>"), "</b></td>"
    print "</tr>"
    for benchmark in benchmarks:
        print "<tr> <td><b>", benchmark, "</b></td>"
        for col in columns:
            print "<td>",
            vals = plot_data[col].get(benchmark, [])
            if not vals:
                print "?",
            else:
                (avg, std_dev) = plotgraph.avg_dev(vals)
                args = filtered_passthru[:]
                perf.append_cgi_args(args, {"test": benchmark})
                for keyval in colkeys[col].split(","):
                    key, val = keyval.split("=", 1)
                    perf.append_cgi_args(args, {key: val})
                print "<a href='%s?%s&runs&attrs'>" % (myurl, "&".join(args))
                print "<b>%.4g</b>" % avg, "</a><br>",
                print "&nbsp; <small> %dr   </small>" % len(vals),
                print "&nbsp; <small> %.3g </small>" % std_dev,
            print "</td>"
        print "</tr>\n"
    print "</table>"
    print "<p> <b>Bold value:</b> Average of this metric, then <br>"
    print "number of good test runs, then standard deviation of those runs"
示例#6
0
    def analyze_variants_all_tests_1_platform(self, platform, vary):
        # generate one graph image for results of all benchmarks
        # on one platform and one kernel, comparing effects of
        # two or more combos of kernel options (test run attributes)
        #   (numa_fake,stale_page,kswapd_merge,sched_idle, etc)
        kernel = self.cgiform.getvalue("kernel", "some_kernel")
        self.passthru.append("kernel=%s" % kernel)

        # two or more vary_groups, one for each plotted line,
        # each group begins with vary= and ends with next  &
        # each group has comma-separated list of test attribute key=val pairs
        #    eg   vary=keyval1,keyval2&vary=keyval3,keyval4
        vary_groups = [dict(pair.split("=", 1) for pair in vary_group.split(",")) for vary_group in vary]

        test = self.benchmarks[0]  # pick any test in all jobs
        kernels, test_tag = self.jobs_selector(test, self.job_table, self.kernel_dates)

        linekeys = {}
        plot_data = {}
        baselines = {}
        for i, vary_group in enumerate(vary_groups):
            group_attributes = self.test_attributes.copy()
            group_attributes.update(vary_group)
            linekey = ",".join("%s=%s" % (attr, vary_group[attr]) for attr in vary_group)
            linekeys[i] = linekey
            data = {}
            for benchmark in self.benchmarks:
                metric = perf.benchmark_main_metric(benchmark)
                runs = collect_testruns(
                    self.job_table[kernel],
                    benchmark + test_tag,
                    group_attributes,
                    self.platforms_filter,
                    "by_hosts" in self.toggles,
                    self.no_antag,
                )
                vals = []
                for testrunx in runs[platform]:
                    vals += perf.get_metric_at_point([testrunx], metric)
                if vals:
                    if benchmark not in baselines:
                        baselines[benchmark], stddev = plotgraph.avg_dev(vals)
                    vals = [val / baselines[benchmark] for val in vals]
                    data[benchmark] = vals
            plot_data[i] = data

        title = "%s on %s" % (kernel, platform)
        for attr in self.test_attributes:
            title += ", %s=%s" % (attr, self.test_attributes[attr])
        if "table" in self.cgiform:
            self.table_for_variants_all_tests(
                title, plot_data, linekeys, range(len(linekeys)), filtered_passthru=self.passthru, test_tag=test_tag
            )
        else:
            graph_variants_all_tests(title, plot_data, linekeys, self.size, "dark" in self.toggles)
示例#7
0
def table_1_metric_all_kernels(plot_data, columns, column_argname, kernels,
                               kernel_dates, myurl, filtered_passthru):
    # generate html table of graph's numbers
    #   for 1 benchmark metric over all kernels (rows),
    #   over various platforms or various antagonists etc (cols).
    ref_thresholds = {}
    print "<table border=1 cellpadding=3 cellspacing=0>"
    print "<tr> <td><b> Kernel </b></td>",
    for label in columns:
        if not label and column_argname == 'antag':
            label = 'no antag'
        print "<td><b>", label.replace('_', '<br>_'), "</b></td>"
    print "</tr>"
    for kernel in kernels:
        print "<tr> <td><b>", kernel, "</b>",
        if kernel in kernel_dates:
            print "<br><small>", kernel_dates[kernel], "</small>"
        print "</td>"
        for col in columns:
            print "<td",
            vals = plot_data[col].get(kernel, [])
            if not vals:
                print "> ?",
            else:
                (avg, std_dev) = plotgraph.avg_dev(vals)
                if col not in ref_thresholds:
                    ref_thresholds[col] = avg - std_dev
                if avg + std_dev < ref_thresholds[col]:
                    print "bgcolor=pink",
                print "> ",
                args = filtered_passthru[:]
                perf.append_cgi_args(args, {
                    column_argname: col,
                    'kernel': kernel
                })
                print "<a href='%s?%s&runs&attrs'>" % (myurl, '&'.join(args))
                print "<b>%.4g</b>" % avg, "</a><br>",
                print "&nbsp; <small> %dr   </small>" % len(vals),
                print "&nbsp; <small> %.3g </small>" % std_dev,
            print "</td>"
        print "</tr>\n"
    print "</table>"
    print "<p> <b>Bold value:</b> Average of this metric, then <br>"
    print "number of good test runs, then standard deviation of those runs"
    print "<br> Pink if regressed from reference kernel"
示例#8
0
def collect_scaled_scores(metric, test_runs, regressed_platforms, relative):
    # get scores of test runs for 1 test on some kernels and platforms
    # optionally make relative to oldest (?) kernel on that platform
    # arrange by plotline (ie platform) for gnuplot
    plot_data = {}  # platform --> (kernel --> list of perf scores)
    baseline = {}
    for kernel in sorted(test_runs.keys()):
        for platform in test_runs[kernel]:
            if not (regressed_platforms is None or platform in regressed_platforms):
                continue  # delete results for uninteresting platforms
            vals = perf.get_metric_at_point(test_runs[kernel][platform], metric)
            if vals:
                if relative:
                    if platform not in baseline:
                        baseline[platform], std = plotgraph.avg_dev(vals)
                    vals = [v / baseline[platform] for v in vals]
                pdp = plot_data.setdefault(platform, {})
                pdp.setdefault(kernel, []).extend(vals)
    return plot_data
示例#9
0
def table_all_metrics_1_platform(test_runs, platform, relative):
    # TODO: show std dev in cells
    #       can't mark regressions, since some metrics improve downwards
    kernels = perf.sort_kernels(test_runs.keys())
    scores = {}
    attrs = set()
    for kernel in kernels:
        testruns = test_runs[kernel].get(platform, [])
        if testruns:
            d = perf.collect_all_metrics_scores(testruns)
            scores[kernel] = d
            attrs.update(set(d.keys()))
        else:
            print "No runs completed on", kernel, "<br>"
    attrs = sorted(list(attrs))[:100]

    print "<table border=1 cellpadding=4 cellspacing=0>"
    print "<tr><td> Metric </td>"
    for kernel in kernels:
        kernel = kernel.replace("_", "_<br>")
        print "<td>", kernel, "</td>"
    print "</tr>"
    for attr in attrs:
        print "<tr>"
        print "<td>", attr, "</td>"
        baseline = None
        for kernel in kernels:
            print "<td>",
            if kernel in scores and attr in scores[kernel]:
                (avg, dev) = plotgraph.avg_dev(scores[kernel][attr])
                if baseline and relative:
                    percent = (avg / baseline - 1) * 100
                    print "%+.1f%%" % percent,
                else:
                    baseline = avg
                    print "%.4g" % avg,
            else:
                print "?"
            print "</td>"
        print "</tr>"
    print "</table>"
示例#10
0
def table_all_metrics_1_platform(test_runs, platform, relative):
    # TODO: show std dev in cells
    #       can't mark regressions, since some metrics improve downwards
    kernels = perf.sort_kernels(test_runs.keys())
    scores = {}
    attrs = set()
    for kernel in kernels:
        testruns = test_runs[kernel].get(platform, [])
        if testruns:
            d = perf.collect_all_metrics_scores(testruns)
            scores[kernel] = d
            attrs.update(set(d.keys()))
        else:
            print "No runs completed on", kernel, "<br>"
    attrs = sorted(list(attrs))[:100]

    print "<table border=1 cellpadding=4 cellspacing=0>"
    print "<tr><td> Metric </td>"
    for kernel in kernels:
        kernel = kernel.replace("_", "_<br>")
        print "<td>", kernel, "</td>"
    print "</tr>"
    for attr in attrs:
        print "<tr>"
        print "<td>", attr, "</td>"
        baseline = None
        for kernel in kernels:
            print "<td>",
            if kernel in scores and attr in scores[kernel]:
                (avg, dev) = plotgraph.avg_dev(scores[kernel][attr])
                if baseline and relative:
                    percent = (avg / baseline - 1) * 100
                    print "%+.1f%%" % percent,
                else:
                    baseline = avg
                    print "%.4g" % avg,
            else:
                print "?"
            print "</td>"
        print "</tr>"
    print "</table>"
示例#11
0
def table_1_metric_all_kernels(plot_data, columns, column_argname, kernels, kernel_dates, myurl, filtered_passthru):
    # generate html table of graph's numbers
    #   for 1 benchmark metric over all kernels (rows),
    #   over various platforms or various antagonists etc (cols).
    ref_thresholds = {}
    print "<table border=1 cellpadding=3 cellspacing=0>"
    print "<tr> <td><b> Kernel </b></td>",
    for label in columns:
        if not label and column_argname == "antag":
            label = "no antag"
        print "<td><b>", label.replace("_", "<br>_"), "</b></td>"
    print "</tr>"
    for kernel in kernels:
        print "<tr> <td><b>", kernel, "</b>",
        if kernel in kernel_dates:
            print "<br><small>", kernel_dates[kernel], "</small>"
        print "</td>"
        for col in columns:
            print "<td",
            vals = plot_data[col].get(kernel, [])
            if not vals:
                print "> ?",
            else:
                (avg, std_dev) = plotgraph.avg_dev(vals)
                if col not in ref_thresholds:
                    ref_thresholds[col] = avg - std_dev
                if avg + std_dev < ref_thresholds[col]:
                    print "bgcolor=pink",
                print "> ",
                args = filtered_passthru[:]
                perf.append_cgi_args(args, {column_argname: col, "kernel": kernel})
                print "<a href='%s?%s&runs&attrs'>" % (myurl, "&".join(args))
                print "<b>%.4g</b>" % avg, "</a><br>",
                print "&nbsp; <small> %dr   </small>" % len(vals),
                print "&nbsp; <small> %.3g </small>" % std_dev,
            print "</td>"
        print "</tr>\n"
    print "</table>"
    print "<p> <b>Bold value:</b> Average of this metric, then <br>"
    print "number of good test runs, then standard deviation of those runs"
    print "<br> Pink if regressed from reference kernel"
示例#12
0
def collect_scaled_scores(metric, test_runs, regressed_platforms, relative):
    # get scores of test runs for 1 test on some kernels and platforms
    # optionally make relative to oldest (?) kernel on that platform
    # arrange by plotline (ie platform) for gnuplot
    plot_data = {}  # platform --> (kernel --> list of perf scores)
    baseline = {}
    for kernel in sorted(test_runs.keys()):
        for platform in test_runs[kernel]:
            if not (regressed_platforms is None
                    or platform in regressed_platforms):
                continue  # delete results for uninteresting platforms
            vals = perf.get_metric_at_point(test_runs[kernel][platform],
                                            metric)
            if vals:
                if relative:
                    if platform not in baseline:
                        baseline[platform], std = plotgraph.avg_dev(vals)
                    vals = [v / baseline[platform] for v in vals]
                pdp = plot_data.setdefault(platform, {})
                pdp.setdefault(kernel, []).extend(vals)
    return plot_data