Exemplo n.º 1
0
def samples_distribution(data) -> None:
    freqd = {}
    for dp in data:
        gen, sam = dp.generation_id, dp.sample
        hsm = crypto.sha256_str(sam)
        if gen in freqd:
            if hsm in freqd[gen]:
                freqd[gen][hsm] += 1
            else:
                freqd[gen][hsm] = 1
        else:
            freqd[gen] = {}
            freqd[gen][hsm] = 1
    for k, v in freqd.items():
        gdict = {}
        for samp, freq in v.items():
            if freq in gdict:
                gdict[freq] += 1
            else:
                gdict[freq] = 1
        freqd[k] = (list(gdict.keys()), list(gdict.values()))
    plt.GrouppedBars(
        groups=freqd,  # Dict[Dict[int, int]]
        plot_name="freq_samples_per_gen",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title="Repetition of samples per generation",
        x_name="# of repetitions",
    )

    return
Exemplo n.º 2
0
def input_samples_distribution(data) -> None:
    # 1) Frequency per generation.
    #   x-axis: times occured, y-axis: how many samples did hit these freq.
    #   One group of these distributions per generation.
    freqd = {}
    for dp in data:
        gen, f = dp.generation_id, dp.frequency
        if gen in freqd:
            if f in freqd[gen]:
                freqd[gen][f] += 1
            else:
                freqd[gen][f] = 1
        else:
            freqd[gen] = {}
            freqd[gen][f] = 1
    for k, v in freqd.items():
        freqd[k] = (list(v.keys()), list(v.values()))

    plt.GrouppedBars(
        groups=freqd,  # Dict[Dict[int, int]]
        plot_name="freq_input_samples_per_gen",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title="Repetition of input/samples pair per generation",
        x_name="# of repetitions",
    )
    return
Exemplo n.º 3
0
def score_direction_categorical_distrib_per_rel_hlen(data) -> None:
    """
  Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit.
  """
    abshl = []
    score_ds = []
    groups = {
        'better score': {},
        'worse score': {},
        'same score': {},
    }
    normalizers = {
        'better score': 0,
        'worse score': 0,
        'same score': 0,
    }
    max_abs = 0
    for dp in data:
        try:
            rhl = dp.rel_hole_lengths
            rounded = int(100 * float(rhl))
            max_abs = max(max_abs, rounded)
            sd = dp.score_delta
            if not math.isinf(sd):
                if sd > 0:
                    k = 'worse score'
                    normalizers[k] += 1
                elif sd < 0:
                    k = 'better score'
                    normalizers[k] += 1
                else:
                    k = 'same score'
                    normalizers[k] += 1
                if str(rounded) not in groups[k]:
                    groups[k][str(rounded)] = 1
                else:
                    groups[k][str(rounded)] += 1
        except Exception as e:
            continue
    for k, v in groups.items():
        for rhlk, rhlv in v.items():
            groups[k][rhlk] = groups[k][rhlk] / normalizers[k]

    for k, v in groups.items():
        groups[k] = (list(v.keys()), list(v.values()))

    plt.GrouppedBars(
        groups=groups,  # Dict[Dict[int, int]]
        plot_name="score_cat_distrib_rel_hlen",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title=
        "Score Direction Distribution per Category VS Relative Hole Length",
        x_name="Size of Hole %",
    )
Exemplo n.º 4
0
def KAverageScore(**kwargs) -> None:
  """
  Compare the average of top-K closest per target benchmark
  for all different database groups.
  """
  db_groups      = kwargs.get('db_groups')
  target         = kwargs.get('targets')
  feature_space  = kwargs.get('feature_space')
  top_k          = kwargs.get('top_k')
  unique_code    = kwargs.get('unique_code', False)
  plot_config    = kwargs.get('plot_config')
  workspace_path = kwargs.get('workspace_path')
  groups = {}

  benchmarks = target.get_benchmarks(feature_space)
  target_origin_dists = {}
  for dbg in db_groups:
    if not (dbg.db_type == samples_database.SamplesDatabase or dbg.db_type == encoded.EncodedContentFiles or dbg.db_type == clsmith.CLSmithDatabase):
      raise ValueError("Scores require SamplesDatabase or EncodedContentFiles but received", dbg.db_type)
    groups[dbg.group_name] = ([], [])
    for benchmark in tqdm.tqdm(benchmarks, total = len(benchmarks), desc = "Benchmarks"):
      groups[dbg.group_name][0].append(benchmark.name)
      # Find shortest distances.
      if unique_code:
        get_data = lambda x: dbg.get_unique_data_features(x)
      else:
        get_data = lambda x: dbg.get_data_features(x)

      distances = workers.SortedDistances(get_data(feature_space), benchmark.features, feature_space)
      # Compute target's distance from O(0,0)
      assert len(distances) != 0, "Sorted src list for {} is empty!".format(dbg.group_name)
      avg_dist = sum(distances[:top_k]) / top_k
      if benchmark.name in target_origin_dists:
        target_origin_dists[benchmark.name] = max(target_origin_dists[benchmark.name], avg_dist)
      else:
        target_origin_dists[benchmark.name] = max(math.sqrt(sum([x**2 for x in benchmark.features.values()])), avg_dist)

      # groups[dbg.group_name][1].append(100 * ((target_origin_dist - avg_dist) / target_origin_dist))
      groups[dbg.group_name][1].append(avg_dist)

  for group_name, tup in groups.items():
    bench_names, raw_dists = tup
    for idx, (bench_name, raw_dist) in enumerate(zip(bench_names, raw_dists)):
      groups[group_name][1][idx] = 100 * ( (target_origin_dists[bench_name] - raw_dist ) / target_origin_dists[bench_name])

  plotter.GrouppedBars(
    groups = groups,
    plot_name = "avg_{}_dist_{}_{}".format(top_k, feature_space.replace("Features", " Features"), '-'.join([dbg.group_name for dbg in db_groups])),
    title = "{}".format(feature_space.replace("Features", " Features")),
    path = workspace_path,
    **plot_config if plot_config else {},
  )
  return
Exemplo n.º 5
0
def score_vs_token_delta(data) -> None:
    # 5) Bar plot: 6 linear combinations of sign of token delta and score delta (neg, pos, 0.0).
    groups = {
        'better score':
        [['token delta > 0', 'token delta < 0', 'token delta == 0'], [0, 0,
                                                                      0]],
        'worse score':
        [['token delta > 0', 'token delta < 0', 'token delta == 0'], [0, 0,
                                                                      0]],
        'same score':
        [['token delta > 0', 'token delta < 0', 'token delta == 0'], [0, 0,
                                                                      0]],
    }
    nsum = 0
    for dp in data:
        td = dp.hole_ind_length - sum(
            [int(x) for x in dp.abs_hole_lengths.split(',') if x])
        sd = dp.score_delta if not math.isinf(dp.score_delta) else None
        if sd is not None and td is not None:
            nsum += 1
            if sd < 0:
                if td > 0:
                    groups['better score'][1][0] += 1
                elif td < 0:
                    groups['better score'][1][1] += 1
                else:
                    groups['better score'][1][2] += 1
            elif sd > 0:
                if td > 0:
                    groups['worse score'][1][0] += 1
                elif td < 0:
                    groups['worse score'][1][1] += 1
                else:
                    groups['worse score'][1][2] += 1
            else:
                if td > 0:
                    groups['same score'][1][0] += 1
                elif td < 0:
                    groups['same score'][1][1] += 1
                else:
                    groups['same score'][1][2] += 1
    for k, v in groups.items():
        for idx, nv in enumerate(v[1]):
            groups[k][1][idx] = 100 * (nv / nsum)
    plt.GrouppedBars(
        groups=groups,
        plot_name="token_score_deltas",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title="Sample Frequency % VS token & score delta",
        x_name="category",
    )
    return
Exemplo n.º 6
0
def score_per_rel_hlen(data) -> None:
    """
  Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit.
  """
    abshl = []
    score_ds = []
    groups = {
        'better score': {},
        'worse score': {},
        'same score': {},
    }
    max_abs = 0
    for dp in data:
        try:
            rhl = dp.rel_hole_lengths
            rounded = int(100 * float(rhl))
            max_abs = max(max_abs, rounded)
            sd = dp.score_delta
            if not math.isinf(sd):
                if sd > 0:
                    k = 'worse score'
                elif sd < 0:
                    k = 'better score'
                else:
                    k = 'same score'
                if str(rounded) not in groups[k]:
                    groups[k][str(rounded)] = 1
                else:
                    groups[k][str(rounded)] += 1
        except Exception as e:
            continue
    for l in range(0, max_abs):
        total = 0
        for k, v in groups.items():
            if str(l) in v:
                total += v[str(l)]
        for k, v in groups.items():
            if str(l) in v:
                groups[k][str(l)] = 100 * (v[str(l)] / total)
    for k, v in groups.items():
        groups[k] = (list(v.keys()), list(v.values()))

    plt.GrouppedBars(
        groups=groups,  # Dict[Dict[int, int]]
        plot_name="score_per_rel_hlen",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title="Score Direction (%) per Relative Hole Length",
        x_name="Size of Hole %",
    )
Exemplo n.º 7
0
def comp_vs_len_indices_over_len_input(data) -> None:
    """
  Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit.
  """
    abshl = []
    score_ds = []
    groups = {
        'compile': {},
        'not-compile': {},
    }
    max_len_ind = 0.0
    for dp in data:
        try:
            len_ratio = dp.hole_ind_length / len(
                [int(x) for x in dp.encoded_input_ids.split(',') if x])
            len_ratio = round(len_ratio, 1)
            max_len_ind = max(max_len_ind, len_ratio)
            cs = dp.compile_status
            if cs == 1:
                k = 'compile'
            else:
                k = 'not-compile'
            if str(len_ratio) not in groups[k]:
                groups[k][str(len_ratio)] = 1
            else:
                groups[k][str(len_ratio)] += 1
        except Exception as e:
            logger.error(e)
            continue
    # for l in range(0, max_len_ind):
    #   total = 0
    #   for k, v in groups.items():
    #     if str(l) in v:
    #       total += v[str(l)]
    # for k, v in groups.items():
    #   if str(l) in v:
    #     groups[k][str(l)] = 100 * (v[str(l)] / total)
    for k, v in groups.items():
        groups[k] = (list(v.keys()), list(v.values()))

    plt.GrouppedBars(
        groups=groups,  # Dict[Dict[int, int]]
        plot_name="comp_per_indices_input_len_ratio",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title="Compilability VS (Length of Indices / Length of Input)",
        x_name="Length of Indices / Length of Input",
    )
    return
Exemplo n.º 8
0
def comp_vs_num_tokens(data) -> None:
    """
  Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit.
  """
    abshl = []
    score_ds = []
    groups = {
        'compile': {},
        'not-compile': {},
    }
    max_numtok = 0
    for dp in data:
        try:
            numtok = dp.num_tokens
            max_numtok = max(max_numtok, numtok)
            cs = dp.compile_status
            if cs == 1:
                k = 'compile'
            else:
                k = 'not-compile'
            if str(numtok) not in groups[k]:
                groups[k][str(numtok)] = 1
            else:
                groups[k][str(numtok)] += 1
        except Exception as e:
            logger.error(e)
            continue
    for l in range(0, max_numtok):
        total = 0
        for k, v in groups.items():
            if str(l) in v:
                total += v[str(l)]
        for k, v in groups.items():
            if str(l) in v:
                groups[k][str(l)] = 100 * (v[str(l)] / total)
    for k, v in groups.items():
        groups[k] = (list(v.keys()), list(v.values()))

    plt.GrouppedBars(
        groups=groups,  # Dict[Dict[int, int]]
        plot_name="comp_per_len_sample",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title="Compilability % VS Length of Sample",
        x_name="Length of Sample",
    )
    return
Exemplo n.º 9
0
def comp_vs_token_delta(data) -> None:
    # 6) Bar plot: 4 linear combinations of compilability and token delta.
    groups = {
        'token delta > 0': [['compile', 'not-compile'], [0, 0]],
        'token delta < 0': [['compile', 'not-compile'], [0, 0]],
        'token delta == 0': [['compile', 'not-compile'], [0, 0]],
    }
    nsum = 0
    for dp in data:
        td = dp.hole_ind_length - sum(
            [int(x) for x in dp.abs_hole_lengths.split(',') if x])
        cs = dp.compile_status
        if td is not None and cs is not None:
            nsum += 1
            if td > 0:
                if cs == 1:
                    groups['token delta > 0'][1][0] += 1
                else:
                    groups['token delta > 0'][1][1] += 1
            elif td < 0:
                if cs == 1:
                    groups['token delta < 0'][1][0] += 1
                else:
                    groups['token delta < 0'][1][1] += 1
            else:
                if cs == 1:
                    groups['token delta == 0'][1][0] += 1
                else:
                    groups['token delta == 0'][1][1] += 1
    for k, v in groups.items():
        for idx, nv in enumerate(v[1]):
            groups[k][1][idx] = 100 * (nv / nsum)

    plt.GrouppedBars(
        groups=groups,
        plot_name="comp_token_delta",
        path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent,
        title="Sample Frequency % VS Compilability & token delta",
        x_name="category",
    )
    return
Exemplo n.º 10
0
def ExecutionTimesLocalSize() -> None:
    """
  Iterate over multiple global sizes and collect the execution time
  for transferring to CPU and GPU and executing kernel on CPU and GPU
  and report groupped bar plot.
  """
    global src1
    global src2

    MIN_LISZE_POW = 0
    MAX_LSIZE_POW = 21
    GSIZE_POW = 21
    N_RUNS = 10**2

    for n, src in [("src1", src1), ("src2", src2)]:
        labels = {
            'CPU': {
                'data': [],
                'names': None
            },
            'GPU': {
                'data': [],
                'names': None
            },
        }
        groups = {
            'cpu_transfer': [[], []],
            'cpu_kernel': [[], []],
            'gpu_transfer': [[], []],
            'gpu_kernel': [[], []],
        }
        for lsize_pow in range(MIN_LISZE_POW, MAX_LSIZE_POW + 1):
            print("##########", lsize_pow, 2**lsize_pow)

            cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes(
                src, num_runs=N_RUNS, gsize=2**GSIZE_POW, lsize=2**lsize_pow)

            if cpt is None:
                while cpt is None:
                    cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes(
                        src,
                        num_runs=N_RUNS,
                        gsize=2**GSIZE_POW,
                        lsize=2**lsize_pow)

            print(cpt.mean(), cpk.mean(), gpt.mean(), gpk.mean())

            if not math.isnan(cpt.mean()):
                groups['cpu_transfer'][0].append(lsize_pow)
                groups['cpu_transfer'][1].append(cpt.mean() / (10**6))

            if not math.isnan(cpk.mean()):
                groups['cpu_kernel'][0].append(lsize_pow)
                groups['cpu_kernel'][1].append(cpk.mean() / (10**6))

            if not math.isnan(gpt.mean()):
                groups['gpu_transfer'][0].append(lsize_pow)
                groups['gpu_transfer'][1].append(gpt.mean() / (10**6))

            if not math.isnan(gpk.mean()):
                groups['gpu_kernel'][0].append(lsize_pow)
                groups['gpu_kernel'][1].append(gpk.mean() / (10**6))

        plt.GrouppedBars(
            groups=groups,
            plot_name="exec_times_per_lsize",
            path=pathlib.Path("./plots/exec_times_lsize/{}".format(n)),
            x_name="power of 2",
            y_name="ms",
        )
    return
Exemplo n.º 11
0
def ExecutionTimesGlobalSize() -> None:
    """
  Iterate over multiple global sizes and collect the execution time
  for transferring to CPU and GPU and executing kernel on CPU and GPU
  and report groupped bar plot.
  """
    global src1
    global src2

    MIN_GISZE_POW = 8
    MAX_GSIZE_POW = 28
    N_RUNS = {
        'src1': {
            2**8: 10**5,
            2**9: 10**5,
            2**10: 10**5,
            2**11: 10**5,
            2**12: 10**5,
            2**13: 10**5,
            2**14: 10**4,
            2**15: 10**4,
            2**16: 10**4,
            2**17: 10**4,
            2**18: 10**4,
            2**19: 10**3,
            2**20: 10**3,
            2**21: 10**3,
            2**22: 10**3,
            2**23: 10**2,
            2**24: 10**2,
            2**25: 10**1,
            2**26: 10**1,
            2**27: 10**1,
            2**28: 10**1,
        },
        'src2': {
            2**8: 10**5,
            2**9: 10**5,
            2**10: 10**5,
            2**11: 10**5,
            2**12: 10**5,
            2**13: 10**4,
            2**14: 10**4,
            2**15: 10**4,
            2**16: 10**4,
            2**17: 10**3,
            2**18: 10**3,
            2**19: 10**3,
            2**20: 10**3,
            2**21: 10**2,
            2**22: 10**2,
            2**23: 10**2,
            2**24: 10**1,
            2**25: 10**1,
            2**26: 10**1,
            2**27: 10**1,
            2**28: 10**1,
        }
    }
    for n, src in [("src1", src1), ("src2", src2)]:
        labels = {
            'CPU': {
                'data': [],
                'names': None
            },
            'GPU': {
                'data': [],
                'names': None
            },
        }
        groups = {
            'cpu_transfer': [[], []],
            'cpu_kernel': [[], []],
            'gpu_transfer': [[], []],
            'gpu_kernel': [[], []],
        }
        for gsize_pow in range(MIN_GISZE_POW, MAX_GSIZE_POW + 1):
            print("##########", gsize_pow, 2**gsize_pow)

            cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes(
                src,
                num_runs=N_RUNS[n][2**gsize_pow],
                gsize=2**gsize_pow,
                lsize=256)

            if cpt is None:
                while cpt is None:
                    cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes(
                        src,
                        num_runs=N_RUNS[n][2**gsize_pow],
                        gsize=2**gsize_pow,
                        lsize=256)

            print(cpt.mean(), cpk.mean(), gpt.mean(), gpk.mean())

            if not math.isnan(cpt.mean()):
                groups['cpu_transfer'][0].append(lsize_pow)
                groups['cpu_transfer'][1].append(cpt.mean() / (10**6))

            if not math.isnan(cpk.mean()):
                groups['cpu_kernel'][0].append(lsize_pow)
                groups['cpu_kernel'][1].append(cpk.mean() / (10**6))

            if not math.isnan(gpt.mean()):
                groups['gpu_transfer'][0].append(lsize_pow)
                groups['gpu_transfer'][1].append(gpt.mean() / (10**6))

            if not math.isnan(gpk.mean()):
                groups['gpu_kernel'][0].append(lsize_pow)
                groups['gpu_kernel'][1].append(gpk.mean() / (10**6))

        plt.GrouppedBars(
            groups=groups,
            plot_name="exec_times_per_gsize",
            path=pathlib.Path("./plots/exec_times_gsize/{}".format(n)),
            x_name="power of 2",
            y_name="ms",
        )
    return
Exemplo n.º 12
0
def SRCIRORVsBenchPress(**kwargs) -> None:
    """
  Compare mutec mutation tool on github's database against BenchPress.
  Comparison is similar to KAverageScore comparison.
  """
    seed = kwargs.get('seed')
    benchpress = kwargs.get('benchpress')
    srciror_cache = kwargs.get('srciror_cache', '')
    mutation_level = kwargs.get('mutation_level')
    target = kwargs.get('targets')
    feature_space = kwargs.get('feature_space')
    top_k = kwargs.get('top_k')
    beam_width = kwargs.get('beam_width')
    unique_code = kwargs.get('unique_code', False)
    plot_config = kwargs.get('plot_config')
    workspace_path = kwargs.get('workspace_path')

    if mutation_level == 'src':
        if not pathlib.Path(SRCIROR_SRC).exists():
            raise FileNotFoundError(
                "SRCIROR_src executable not found: {}".format(SRCIROR_SRC))
    else:
        if not pathlib.Path(SRCIROR_IR).exists():
            raise FileNotFoundError(
                "SRCIROR_IR executable not found: {}".format(SRCIROR_IR))
    if seed.db_type != encoded.EncodedContentFiles and seed.db_type != clsmith.CLSmithDatabase:
        raise ValueError(
            "Scores require EncodedContentFiles or CLSmithDatabase but received",
            seed.db_type)
    if benchpress.db_type != samples_database.SamplesDatabase:
        raise ValueError(
            "BenchPress scores require SamplesDatabase but received",
            benchpress.db_type)
    if seed.db_type == clsmith.CLSmithDatabase:
        if not pathlib.Path(CLSMITH_INCLUDE).exists():
            raise FileNotFoundError(
                "CLSMITH_INCLUDE folder does not exist: {}".format(
                    CLSMITH_INCLUDE))

    ## Load database and checkpoint of targets.
    mutec_db = samples_database.SamplesDatabase(url="sqlite:///{}".format(
        pathlib.Path(srciror_cache).resolve()),
                                                must_exist=False)
    done = set()
    with mutec_db.Session(commit=True) as s:
        res = s.query(samples_database.SampleResults).filter_by(
            key=feature_space).first()
        if res is not None:
            done.update([str(x) for x in res.results.split('\n')])
        s.commit()

    ## Initialize dictionary.
    groups = {}
    groups["SRCIROR_{}".format(mutation_level)] = ([], [])
    groups[seed.group_name] = ([], [])
    groups[benchpress.group_name] = ([], [])

    ## Fix fetching data functions.
    if unique_code:
        git_get_data = lambda x: seed.get_unique_data_features(x)
        bp_get_data = lambda x: benchpress.get_unique_data_features(x)
    else:
        git_get_data = lambda x: seed.get_data_features(x)
        bp_get_data = lambda x: benchpress.get_data_features(x)

    ## Run engine on mutec.
    benchmarks = target.get_benchmarks(feature_space)
    for benchmark in tqdm.tqdm(benchmarks,
                               total=len(benchmarks),
                               desc="Benchmarks"):

        ## This has already been searched for.
        if benchmark.name in done:
            continue

        ## Tuple of closest src, distance from target benchmark.0
        closest = workers.SortedSrcDistances(git_get_data(feature_space),
                                             benchmark.features, feature_space)

        ## IF CLsmith takes too long here, collect only features, then for the beam size go and fetch
        ## the code.

        # Split source and distances lists.
        git_dist = [x for _, _, x in closest]

        ## If distances are already minimized, nothing to do.
        if sum(git_dist[:top_k]) == 0:
            continue

        l.logger().info(benchmark.name)

        closest_mutec_src = beam_srciror(
            [(src, inc, dist)
             for src, inc, dist in closest[:beam_width] if dist > 0],
            benchmark.features, feature_space, beam_width,
            mutec_db)[:top_k]  # tuple of (src, distance)
        closest_mutec_dist = [x for _, _, x in closest_mutec_src]

        assert len(closest_mutec_dist) == len(git_dist[:top_k])
        ## If mutec has provided a better score
        if sum(closest_mutec_dist) < sum(git_dist[:top_k]):

            l.logger().info("Score reduced from {} to {}".format(
                sum(git_dist[:top_k]), sum(closest_mutec_dist)))
            l.logger().info("Best score from {} to {}".format(
                git_dist[0], closest_mutec_dist[0]))

            with mutec_db.Session(commit=True) as s:
                res = s.query(samples_database.SampleResults).filter_by(
                    key=feature_space).first()
                if res is not None:
                    res.results = res.results + "\n" + benchmark.name
                else:
                    s.add(
                        samples_database.SampleResults(key=feature_space,
                                                       results=benchmark.name))
                s.commit()

            # Compute target's distance from O(0,0)
            target_origin_dist = math.sqrt(
                sum([x**2 for x in benchmark.features.values()]))
            mutec_avg_dist = sum(closest_mutec_dist) / top_k

            groups["SRCIROR_{}".format(mutation_level)][0].append(
                benchmark.name)
            groups["SRCIROR_{}".format(mutation_level)][1].append(
                100 *
                ((target_origin_dist - mutec_avg_dist) / target_origin_dist))

            # Compute target's distance from O(0,0)
            git_avg_dist = sum(git_dist[:top_k]) / top_k
            groups[seed.group_name][0].append(benchmark.name)
            groups[seed.group_name][1].append(
                100 *
                ((target_origin_dist - git_avg_dist) / target_origin_dist))

    ## Run engine on benchpress.
    benchmarks = target.get_benchmarks(feature_space)
    for benchmark in tqdm.tqdm(benchmarks,
                               total=len(benchmarks),
                               desc="Benchpress"):
        ## Run only for benchmarks mutec has improved.
        if benchmark.name in groups["SRCIROR_{}".format(mutation_level)][0]:

            l.logger().info(benchmark.name)
            distances = workers.SortedDistances(bp_get_data(feature_space),
                                                benchmark.features,
                                                feature_space)

            # Compute target's distance from O(0,0)
            target_origin_dist = math.sqrt(
                sum([x**2 for x in benchmark.features.values()]))
            avg_dist = sum(distances[:top_k]) / len(distances[:top_k])

            groups[benchpress.group_name][0].append(benchmark.name)
            groups[benchpress.group_name][1].append(
                100 * ((target_origin_dist - avg_dist) / target_origin_dist))

    plotter.GrouppedBars(
        groups=groups,
        plot_name="srciror_src_avg_{}_{}_{}".format(
            top_k, seed.group_name,
            feature_space.replace("Features", " Features")),
        path=workspace_path,
        **plot_config if plot_config else {},
    )
    return
Exemplo n.º 13
0
def plot_speedups_with_clgen(benchmarks_data, clgen_data, suite="npb"):
    """
  Plot speedups of predictive models trained with and without clgen.

  Returns speedups (without and with).
  """
    # datasets: B - benchmarks, S - synthetics, BS - benchmarks + synthetics:
    B = pd.read_csv(benchmarks_data)
    B["group"] = ["B"] * len(B)

    S = pd.read_csv(clgen_data)
    S["group"] = ["S"] * len(S)

    BS = pd.concat((B, S))

    # find the ZeroR. This is the device which is most frequently optimal
    Bmask = B[B["benchmark"].str.contains(suite)]
    zeror = Counter(Bmask["oracle"]).most_common(1)[0][0]
    zeror_runtime = "runtime_" + zeror.lower()

    # get the names of the benchmarks, in the form: $suite-$version-$benchmark
    benchmark_names = sorted(
        set([
            re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)-", b).group(1)
            for b in B["benchmark"] if b.startswith(suite)
        ]))

    B_out, S_out, BS_out = [], [], []
    for benchmark in benchmark_names:
        clf = model.model()
        features = get_cgo13_features
        # cross validate on baseline
        B_out += model.leave_one_benchmark_out(clf, features, B, benchmark)
        # reset model
        clf = model.model()
        S_out += model.leave_one_benchmark_out(clf,
                                               features,
                                               BS,
                                               benchmark,
                                               synthetics=True,
                                               is_clgen=True)
        clf = model.model()
        # repeate cross-validation with synthetic kernels
        BS_out += model.leave_one_benchmark_out(clf,
                                                features,
                                                BS,
                                                benchmark,
                                                synthetics=False,
                                                is_clgen=True)

    # create results frame
    R_out = []
    for b, s, bs in zip(B_out, S_out, BS_out):
        # get runtimes of device using predicted device
        b_p_runtime = b["runtime_" + b["p"].lower()]
        s_p_runtime = s["runtime_" + s["p"].lower()]
        bs_p_runtime = bs["runtime_" + bs["p"].lower()]

        # speedup is the ratio of runtime using the predicted device
        # over runtime using ZeroR device
        b["p_speedup"] = b_p_runtime / b[zeror_runtime]
        s["p_speedup"] = s_p_runtime / s[zeror_runtime]
        bs["p_speedup"] = bs_p_runtime / bs[zeror_runtime]

        print(b_p_runtime, s_p_runtime, bs_p_runtime, b[zeror_runtime],
              s[zeror_runtime], bs[zeror_runtime])

        if "training" in benchmarks_data:
            # $benchmark
            group = escape_benchmark_name(b["benchmark"])
        else:
            # $benchmark.$dataset
            group = re.sub(r"[^-]+-[0-9\.]+-([^-]+)-.+", r"\1",
                           b["benchmark"]) + "." + b["dataset"]
        b["group"] = group
        s["group"] = group
        bs["group"] = group

        # set the training data type
        b["training"] = "Grewe et al."
        s["training"] = "Only CLgen"
        bs["training"] = "w. CLgen"

        R_out.append(b)
        R_out.append(s)
        R_out.append(bs)

    R = pd.DataFrame(R_out)

    b_mask = R["training"] == "Grewe et al."
    s_mask = R["training"] == "Only CLgen"
    bs_mask = R["training"] == "w. CLgen"

    B_speedup = mean(R[b_mask].groupby(["group"])["p_speedup"].mean())
    S_speedup = mean(R[s_mask].groupby(["group"])["p_speedup"].mean())
    BS_speedup = mean(R[bs_mask].groupby(["group"])["p_speedup"].mean())

    groups = {
        "Benchmarks": {},
        "Bench+Synth": {},
        "Synthetics": {},
    }

    bench_times = 0.0
    benchsynth_times = 0.0
    synth_times = 0.0

    R.to_csv("./investigate.csv")
    # for x in R[b_mask]:
    #   print(x)
    #   bench_times += x["runtime_" + x["p"].lower()]

    # for x in R[bs_mask]:
    #   benchsynth_times += x["runtime_" + x["p"].lower()]

    # for x in R[s_mask]:
    #   synth_times += x["runtime_" + x["p"].lower()]

    # print(bench_times)
    # print(benchsynth_times)
    # print(synth_times)

    print(len(R[b_mask]["p_speedup"]))
    print(len(R[s_mask]["p_speedup"]))
    print(len(R[bs_mask]["p_speedup"]))

    for x in R[b_mask]["p_speedup"]:
        x = int(x)
        if x not in groups["Benchmarks"]:
            groups["Benchmarks"][x] = 1
        else:
            groups["Benchmarks"][x] += 1

    for x in R[bs_mask]["p_speedup"]:
        x = int(x)
        if x not in groups["Bench+Synth"]:
            groups["Bench+Synth"][x] = 1
        else:
            groups["Bench+Synth"][x] += 1

    for x in R[s_mask]["p_speedup"]:
        x = int(x)
        if x not in groups["Synthetics"]:
            groups["Synthetics"][x] = 1
        else:
            groups["Synthetics"][x] += 1

    for k, v in groups.items():
        groups[k] = (list(v.keys()), list(v.values()))

    plotter.GrouppedBars(
        groups=groups,  # Dict[Dict[int, int]]
        plot_name="speedup_distribution",
        path=pathlib.Path("."),
        title="Speedup distribution frequency",
        x_name="Speedup absolute value",
    )

    b_distr = distributions.GenericDistribution(
        [int(x) for x in R[b_mask]["p_speedup"]], "plots", "benchmarks")
    s_distr = distributions.GenericDistribution(
        [int(x) for x in R[s_mask]["p_speedup"]], "plots", "synthetics")
    bs_distr = distributions.GenericDistribution(
        [int(x) for x in R[bs_mask]["p_speedup"]], "plots",
        "synthetics_benchmarks")

    b_distr.plot()
    s_distr.plot()
    bs_distr.plot()

    print(s_distr - b_distr > 0)
    print(bs_distr - b_distr > 0)

    (s_distr - b_distr).plot()
    (bs_distr - b_distr).plot()

    print("  #. benchmarks:                  ", len(set(B["benchmark"])),
          "kernels,", len(B), "observations")
    print("  #. synthetic:                   ", len(set(S["benchmark"])),
          "kernels,", len(S), "observations")
    print()
    print("  ZeroR device:                    {}".format(zeror))
    print()
    print("  Speedup of Grewe et al.:         {:.2f} x".format(B_speedup))
    print("  Speedup w. CLgen:                {:.2f} x".format(BS_speedup))
    print("  Speedup Only CLgen:              {:.2f} x".format(S_speedup))
    # print("  Speedup of Grewe et al.:         {:.2f} x".format(model.geomean([x for x in R[b_mask]["p_speedup"]])))
    # print("  Speedup w. CLgen:                {:.2f} x".format(model.geomean([x for x in R[bs_mask]["p_speedup"]])))
    # print("  Speedup Only CLgen:              {:.2f} x".format(model.geomean([x for x in R[s_mask]["p_speedup"]])))

    bft = [x.p_speedup for idx, x in R[b_mask].iterrows() if x.group == "FT.B"]
    sft = [x.p_speedup for idx, x in R[s_mask].iterrows() if x.group == "FT.B"]
    bsft = [
        x.p_speedup for idx, x in R[bs_mask].iterrows() if x.group == "FT.B"
    ]

    print()
    print()
    print()

    print("FT.B Grewe: {}".format(sum(bft) / len(bft)))
    print("FT.B w Clgen: {}".format(sum(bsft) / len(bsft)))
    print("FT.B Only Clgen: {}".format(sum(sft) / len(sft)))

    R = R.append(
        {  # average bars
            "group": "Average",
            "p_speedup": B_speedup,
            "training": "Grewe et al."
        },
        ignore_index=True)
    R = R.append(
        {
            "group": "Average",
            "p_speedup": BS_speedup,
            "training": "w. CLgen"
        },
        ignore_index=True)

    R["p_speedup"] -= 1  # negative offset so that bars start at 1

    # colors
    palette = sns.cubehelix_palette(len(set(R["training"])),
                                    rot=-.4,
                                    light=.85,
                                    dark=.35)

    ax = sns.barplot(x="group",
                     y="p_speedup",
                     data=R,
                     ci=None,
                     hue="training",
                     palette=palette)
    plt.ylabel("Speedup")
    plt.xlabel("")

    plt.axhline(y=0, color="k", lw=1)  # speedup line
    plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1,
                linestyle="--")  # average line

    ax.get_legend().set_title("")  # no legend title
    plt.legend(loc='upper right')
    ax.get_legend().draw_frame(True)

    # plot shape and size
    figsize = (3 * 9, 3 * 2.2)
    if "nvidia" in benchmarks_data:
        typecast = int
        plt.ylim(-1, 16)
    elif "training" in benchmarks_data:
        typecast = float
        figsize = (3 * 7, 3 * 3.2)
    else:
        typecast = float

    # counter negative offset:
    ax.set_yticklabels([typecast(i) + 1 for i in ax.get_yticks()])

    plt.setp(ax.get_xticklabels(), rotation=90)

    Finalize(output="plot.png", figsize=figsize, tight=True)
    return B_speedup, BS_speedup