Exemplo n.º 1
0
def download_llvm_files() -> Path:
    """Download and unpack the LLVM data pack."""
    global _LLVM_UNPACKED_LOCATION

    unpacked_location = site_data_path("llvm-v0")
    # Fast path for repeated calls.
    if _LLVM_UNPACKED_LOCATION == unpacked_location:
        return unpacked_location

    with _LLVM_DOWNLOAD_LOCK:
        # Fast path for first call. This check will be repeated inside the locked
        # region if required.
        if (unpacked_location / ".unpacked").is_file():
            _LLVM_UNPACKED_LOCATION = unpacked_location
            return unpacked_location

        with InterProcessLock(cache_path(".llvm-v0-install.LOCK")):
            # Now that the lock is acquired, repeat the check to see if it is
            # necessary to download the dataset.
            if (unpacked_location / ".unpacked").is_file():
                return unpacked_location

            _download_llvm_files(unpacked_location)
            # Create the marker file to indicate that the directory is unpacked
            # and ready to go.
            (unpacked_location / ".unpacked").touch()
            _LLVM_UNPACKED_LOCATION = unpacked_location

        return unpacked_location
Exemplo n.º 2
0
def _get_gcc_datasets(
        gcc_bin: Union[str, Path],
        site_data_base: Optional[Path] = None) -> Iterable[Dataset]:
    site_data_base = site_data_base or site_data_path("gcc-v0")

    yield CHStoneDataset(gcc_bin=gcc_bin, site_data_base=site_data_base)
    yield AnghaBenchDataset(site_data_base=site_data_base)
    yield CsmithDataset(gcc_bin=gcc_bin, site_data_base=site_data_base)
Exemplo n.º 3
0
def get_mlir_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset]:
    """Instantiate the builtin datasets.
    :param site_data_base: The root of the site data path.
    :return: An iterable sequence of :class:`Dataset
        <compiler_gym.datasets.Dataset>` instances.
    """
    site_data_base = site_data_base or site_data_path("mlir-v0")

    yield MatmulDataset(site_data_base=site_data_base)
Exemplo n.º 4
0
def get_storage_paths() -> List[Path]:
    """Return the list of paths used by CompilerGym for filesystem storage.

    :return: A list of filesystem paths that CompilerGym uses to store files.
    """
    return sorted({
        runfiles_path.cache_path("."),
        runfiles_path.transient_cache_path("."),
        runfiles_path.site_data_path("."),
    })
Exemplo n.º 5
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.actions: List[int] = []
        self.datasets_site_path = site_data_path(
            "llvm/10.0.0/bitcode_benchmarks")

        # Register the LLVM datasets.
        self.datasets_site_path.mkdir(parents=True, exist_ok=True)
        self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True)
        for dataset in LLVM_DATASETS:
            self.register_dataset(dataset)
Exemplo n.º 6
0
    def __init__(self, bin: Union[str, Path]):
        self.bin = str(bin)
        self.image = self.bin[len("docker:"):]

        if self.bin.startswith("docker:"):
            pull_docker_image(self.image)
            self.call = self._docker_run
        else:
            self.call = self._subprocess_run

        self.spec = _get_spec(self, cache_dir=site_data_path("gcc-v0"))
Exemplo n.º 7
0
 def setup(cwd: Path):
     cbench_data = site_data_path(
         "llvm-v0/cbench-v1-runtime-data/runtime_data")
     # Copy the input data file into the current directory since ghostscript
     # doesn't like long input paths.
     shutil.copyfile(cbench_data / "office_data" / f"{dataset_id}.ps",
                     cwd / "input.ps")
     # Ghostscript doesn't like the library files being symlinks so copy them
     # into the working directory as regular files.
     for path in (cbench_data / "ghostscript").iterdir():
         if path.name.endswith(".ps"):
             shutil.copyfile(path, cwd / path.name)
Exemplo n.º 8
0
    def __init__(self, *args, **kwargs):
        super().__init__(
            name="benchmark://unrolling-v0",
            license="MIT",
            description="Unrolling example dataset",
            site_data_base=site_data_path(
                "example_dataset"
            ),  # TODO: what should we set this to? we are not using it
        )

        self._benchmarks = {
            "/offsets1": Benchmark.from_file_contents(
                "benchmark://unrolling-v0/offsets1",
                self.preprocess(BENCHMARKS_PATH / "offsets1.c"),
            ),
            "/conv2d": Benchmark.from_file_contents(
                "benchmark://unrolling-v0/conv2d",
                self.preprocess(BENCHMARKS_PATH / "conv2d.c"),
            ),
        }
Exemplo n.º 9
0
def download_cBench_runtime_data() -> bool:
    """Download and unpack the cBench runtime dataset."""
    cbench_data = site_data_path("llvm-v0/cbench-v1-runtime-data/runtime_data")
    if (cbench_data / "unpacked").is_file():
        return False
    else:
        # Clean up any partially-extracted data directory.
        if cbench_data.is_dir():
            shutil.rmtree(cbench_data)

        url, sha256 = _CBENCH_RUNTOME_DATA
        tar_contents = io.BytesIO(download(url, sha256))
        with tarfile.open(fileobj=tar_contents, mode="r:bz2") as tar:
            cbench_data.parent.mkdir(parents=True, exist_ok=True)
            tar.extractall(cbench_data.parent)
        assert cbench_data.is_dir()
        # Create the marker file to indicate that the directory is unpacked
        # and ready to go.
        (cbench_data / "unpacked").touch()
        return True
Exemplo n.º 10
0
def test_download_llvm_threaded_load_test(temporary_environ, tmpwd: Path,
                                          mocker):
    """A load test for download_llvm_files() that checks that redundant
    downloads are not performed when multiple simultaneous calls to
    download_llvm_files() are issued.
    """
    mocker.spy(llvm, "_download_llvm_files")
    mocker.spy(llvm, "download")

    # Force the LLVM download function to run.
    llvm._LLVM_DOWNLOADED = False

    # Force a temporary new site data path and sanity check it.
    temporary_environ["COMPILER_GYM_SITE_DATA"] = str(tmpwd)
    assert str(site_data_path(".")).endswith(str(tmpwd))

    # Perform a bunch of concurrent calls to download_llvm_files().
    with ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(llvm.download_llvm_files) for _ in range(100)
        ]
        for future in futures:
            future.result()

    # For debugging in case of error.
    print("Downloads:",
          llvm._download_llvm_files.call_count)  # pylint: disable
    for root, _, filenames in os.walk(tmpwd):
        print(root)
        for filename in filenames:
            print(Path(root) / filename)

    # Check that the files were unpacked.
    assert (tmpwd / "llvm-v0" / "LICENSE").is_file()
    assert (tmpwd / "llvm-v0" / "bin" / "clang").is_file()

    # Check that the underlying download implementation was only called a single
    # time.
    assert llvm._download_llvm_files.call_count == 1  # pylint: disable
    assert llvm.download.call_count == 1
Exemplo n.º 11
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.actions: List[int] = []
        self.datasets_site_path = site_data_path(
            "llvm/10.0.0/bitcode_benchmarks")

        # Register the LLVM datasets.
        self.datasets_site_path.mkdir(parents=True, exist_ok=True)
        self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True)
        for dataset in LLVM_DATASETS:
            self.register_dataset(dataset)

        self.inst2vec = _INST2VEC_ENCODER

        self.observation.spaces["CpuInfo"].space = DictSpace({
            "name":
            Sequence(size_range=(0, None), dtype=str),
            "cores_count":
            Scalar(min=None, max=None, dtype=int),
            "l1i_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l1i_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l1d_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l1d_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l2_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l2_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l3_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l3_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l4_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l4_cache_count":
            Scalar(min=None, max=None, dtype=int),
        })

        self.observation.add_derived_space(
            id="Inst2vecPreprocessedText",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=str),
            cb=lambda base_observation: self.inst2vec.preprocess(
                base_observation),
            default_value="",
        )
        self.observation.add_derived_space(
            id="Inst2vecEmbeddingIndices",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=np.int32),
            cb=lambda base_observation: self.inst2vec.encode(
                self.inst2vec.preprocess(base_observation)),
            default_value=np.array([self.inst2vec.vocab["!UNK"]]),
        )
        self.observation.add_derived_space(
            id="Inst2vec",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=np.ndarray),
            cb=lambda base_observation: self.inst2vec.embed(
                self.inst2vec.encode(self.inst2vec.preprocess(base_observation)
                                     )),
            default_value=np.vstack(
                [self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]]]),
        )

        self.observation.add_derived_space(
            id="AutophaseDict",
            base_id="Autophase",
            space=DictSpace({
                name: Scalar(min=0, max=None, dtype=int)
                for name in AUTOPHASE_FEATURE_NAMES
            }),
            cb=lambda base_observation: {
                name: val
                for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation)
            },
        )
Exemplo n.º 12
0
    def __init__(
        self,
        local_service_binary: Path,
        port_init_max_seconds: float,
        rpc_init_max_seconds: float,
        process_exit_max_seconds: float,
        script_args: List[str],
        script_env: Dict[str, str],
    ):
        """Constructor.

        :param local_service_binary: The path of the service binary.
        :raises TimeoutError: If fails to establish connection within a specified time limit.
        """
        self.process_exit_max_seconds = process_exit_max_seconds

        if not Path(local_service_binary).is_file():
            raise FileNotFoundError(f"File not found: {local_service_binary}")
        self.cache = ServiceCache()

        # The command that will be executed. The working directory of this
        # command will be set to the local_service_binary's parent, so we can
        # use the relpath for a neater `ps aux` view.
        cmd = [
            f"./{local_service_binary.name}",
            f"--working_dir={self.cache.path}",
        ]
        # Add any custom arguments
        cmd += script_args

        # Set the root of the runfiles directory.
        env = os.environ.copy()
        env["COMPILER_GYM_RUNFILES"] = str(runfiles_path("."))
        env["COMPILER_GYM_SITE_DATA"] = str(site_data_path("."))
        # Set the pythonpath so that executable python scripts can use absolute
        # import paths like `from compiler_gym.envs.foo import bar`.
        if "PYTHONPATH" in env:
            env["PYTHONPATH"] = f'{env["PYTHONPATH"]}:{env["COMPILER_GYM_RUNFILES"]}'
        else:
            env["PYTHONPATH"] = env["COMPILER_GYM_RUNFILES"]

        # Set the verbosity of the service. The logging level of the service is
        # the debug level - 1, so that COMPILER_GYM_DEBUG=3 will cause VLOG(2)
        # and lower to be logged to stdout.
        debug_level = max(
            get_debug_level(),
            logging_level_to_debug_level(logger.getEffectiveLevel()))
        if debug_level > 0:
            cmd.append("--alsologtostderr")
            cmd.append(f"-v={debug_level - 1}")
            # If we are debugging the backend, set the logbuflevel to a low
            # value to disable buffering of logging messages. This removes any
            # buffering between `LOG(INFO) << "..."` and the message being
            # emited to stderr.
            cmd.append("--logbuflevel=-1")
        else:
            # Silence the gRPC logs as we will do our own error reporting, but
            # don't override any existing value so that the user may debug the
            # gRPC backend by setting GRPC_VERBOSITY to ERROR, INFO, or DEBUG.
            if not os.environ.get("GRPC_VERBOSITY"):
                env["GRPC_VERBOSITY"] = "NONE"

        # Set environment variable COMPILER_GYM_SERVICE_ARGS to pass
        # additional arguments to the service.
        args = os.environ.get("COMPILER_GYM_SERVICE_ARGS", "")
        if args:
            cmd.append(args)

        # Add any custom environment variables
        env.update(script_env)

        logger.debug(
            "Exec `%s%s`",
            " ".join(f"{k}={v}" for k, v in script_env.items()) +
            " " if script_env else "",
            join_cmd(cmd),
        )

        self.process = subprocess.Popen(
            cmd,
            env=env,
            cwd=local_service_binary.parent,
        )
        self._process_returncode_exception_raised = False

        # Read the port from a file generated by the service.
        wait_secs = 0.1
        port_path = self.cache / "port.txt"
        end_time = time() + port_init_max_seconds
        while time() < end_time:
            returncode = self.process.poll()
            if returncode is not None:
                try:
                    # Try and decode the name of a signal. Signal returncodes
                    # are negative.
                    returncode = f"{returncode} ({Signals(abs(returncode)).name})"
                except ValueError:
                    pass
                msg = f"Service terminated with returncode: {returncode}"
                # Attach any logs from the service if available.
                logs = truncate_lines(self.loglines(),
                                      max_line_len=100,
                                      max_lines=25,
                                      tail=True)
                if logs:
                    msg = f"{msg}\nService logs:\n{logs}"
                self.cache.close()
                raise ServiceError(msg)
            if port_path.is_file():
                try:
                    with open(port_path) as f:
                        self.port = int(f.read().rstrip())
                    break
                except ValueError:
                    # ValueError is raised by int(...) on invalid input. In that
                    # case, wait for longer.
                    pass
            sleep(wait_secs)
            wait_secs *= 1.2
        else:
            # kill() was added in Python 3.7.
            if sys.version_info >= (3, 7, 0):
                self.process.kill()
            else:
                self.process.terminate()
            self.process.communicate(timeout=rpc_init_max_seconds)
            self.cache.close()
            raise TimeoutError("Service failed to produce port file after "
                               f"{port_init_max_seconds:.1f} seconds")

        url = f"localhost:{self.port}"

        wait_secs = 0.1
        attempts = 0
        end_time = time() + rpc_init_max_seconds
        while time() < end_time:
            try:
                channel = grpc.insecure_channel(
                    url,
                    options=GRPC_CHANNEL_OPTIONS,
                )
                channel_ready = grpc.channel_ready_future(channel)
                attempts += 1
                channel_ready.result(timeout=wait_secs)
                break
            except (grpc.FutureTimeoutError, grpc.RpcError) as e:
                logger.debug("Connection attempt %d = %s %s", attempts,
                             type(e).__name__, str(e))
                wait_secs *= 1.2
        else:
            # kill() was added in Python 3.7.
            if sys.version_info >= (3, 7, 0):
                self.process.kill()
            else:
                self.process.terminate()
            self.process.communicate(timeout=process_exit_max_seconds)

            # Include the last few lines of logs generated by the compiler
            # service, if any.
            logs = truncate_lines(self.loglines(),
                                  max_line_len=100,
                                  max_lines=25,
                                  tail=True)
            logs_message = f" Service logs:\n{logs}" if logs else ""

            self.cache.close()
            raise TimeoutError(
                "Failed to connect to RPC service after "
                f"{rpc_init_max_seconds:.1f} seconds.{logs_message}")

        super().__init__(channel, url)
Exemplo n.º 13
0
    def benchmark_from_size(self,
                            mnk,
                            max_retries: int = 3,
                            retry_count: int = 0) -> MatmulBenchmark:
        """Get a benchmark from a uint32 seed.
        :param mnk: 3-tuple containing m, n, k sizes of the matmul
        :return: A benchmark instance.
        :raises OSError: If matmul fails.
        :raises BenchmarkInitError: If the C program generated by matmul cannot
            be lowered to mlir-IR.
        """
        if retry_count >= max_retries:
            raise OSError(
                f"matmul failed after {retry_count} {plural(retry_count, 'attempt', 'attempts')} "
                f"with size {mnk}")

        self.install()
        mnk = list(mnk)
        # Run matmul with the given size and regex to produce the correct mlir
        logger.debug("Exec matmul --mnk %d", mnk)

        # TODO(kyleherndon): refactor these to another location
        src_content = """
func @matmul(%a: tensor<${M}x${K}xf32> {linalg.buffer_layout = affine_map<(i, j)[s0, s1] -> (i, j)>},
             %b: tensor<${K}x${N}xf32> {linalg.buffer_layout = affine_map<(i, j)[s0, s1] -> (i, j)>},
             %c: tensor<${M}x${N}xf32> {linalg.buffer_layout = affine_map<(i, j)[s0, s1] -> (i, j)>}) -> tensor<${M}x${N}xf32>
attributes { passthrough = [["target-cpu", "haswell"], ["prefer-vector-width", "256"]]}
{
  %f0 = arith.constant 0.0 : f32
  %f1 = linalg.fill(%f0, %c) : f32, tensor<${M}x${N}xf32> -> tensor<${M}x${N}xf32>
  %d = linalg.matmul ins(%a, %b : tensor<${M}x${K}xf32>, tensor<${K}x${N}xf32>)
    outs(%f1: tensor<${M}x${N}xf32>) -> tensor<${M}x${N}xf32>
  return %d : tensor<${M}x${N}xf32>
}"""
        cc_src = """
#include <benchmark/benchmark.h>
#include <mlir/ExecutionEngine/RunnerUtils.h>

#include <cstdio>
#include <vector>

void naive_matmul(const float* a, const float* b, float* c, size_t m, size_t k, size_t n) {
  // correctness check
  for (size_t i = 0; i < m; i++) {
    for (size_t j = 0; j < n; j++) {
#ifdef COLUMN_MAJOR
      size_t ci = i + j * m;
#else
      size_t ci = i * n + j;
#endif
      c[ci] = 0.0f;
      for (size_t p = 0; p < k; p++) {
#ifdef COLUMN_MAJOR
        c[ci] += a[i + p * m] * b[p + j * k];
#else
        c[ci] += a[i * k + p] * b[p * n + j];
#endif
      }
    }
  }
}

void init_matrix(float* a, int nrows, int ncols) {
  for (int j = 0; j < ncols; j++) {
    for (int i = 0; i < nrows; i++) {
      a[i + j * nrows] = ((float)rand() / (float)RAND_MAX);
    }
  }
}

extern "C" {
void matmul(float* aligned_a, float* allocated_a, int64_t offset_a, int64_t size_a0,
            int64_t size_a1, int64_t strides_a0, int64_t strides_a1, float* aligned_b,
            float* allocated_b, int64_t offset_b, int64_t size_b0, int64_t size_b1,
            int64_t strides_b0, int64_t strides_b1, float* aligned_c, float* allocated_c,
            int64_t offset_c, int64_t size_c0, int64_t size_c1, int64_t strides_c0,
            int64_t strides_c1);
}

size_t g_errors = 0;
static void BenchmarkFunction(benchmark::State& state) {
  // TODO(boian): pass these as command line arguments
  int MDIM = ${M};
  int NDIM = ${N};
  int KDIM = ${K};
  std::vector<float> a(MDIM * KDIM);
  std::vector<float> b(KDIM * NDIM);
  std::vector<float> c(MDIM * NDIM);
  float *A = a.data(), *B = b.data(), *C = c.data();
  //  a[0] = 1; b[0] = 2;
  init_matrix(A, MDIM, KDIM);
  init_matrix(B, KDIM, NDIM);
  init_matrix(C, MDIM, NDIM);
  int LDA = KDIM;
  int LDB = NDIM;
  int LDC = NDIM;

  for (auto _ : state) {
    matmul(A, A, 0, MDIM, KDIM, LDA, 1, B, B, 0, KDIM, NDIM, LDB, 1, C, C, 0, MDIM, NDIM, LDC, 1);
  }

  std::vector<float> c2(MDIM * NDIM);
  float* C2 = c2.data();
  size_t errors = 0;
  naive_matmul(A, B, C2, MDIM, KDIM, NDIM);
  for (size_t i = 0; i < MDIM; i++) {
    for (size_t j = 0; j < NDIM; j++) {
      size_t ci = i + j * MDIM;
      if (std::abs(C[ci] - C2[ci]) > 0.01f) {
        if (errors == 0) {
          fprintf(stderr, "Incorrect result at index %ld,%ld: C=%0.2f C2=%0.2f\\n", i, j, C[ci],
                  C2[ci]);
        }
        errors++;
      }
    }
  }
  fprintf(stderr, "Detected %ld errors.\\n", errors);
  g_errors = errors;
}

int main(int argc, char** argv) {
  benchmark::Initialize(&argc, argv);
  benchmark::RegisterBenchmark("BM_Matmul", BenchmarkFunction)
      ->MeasureProcessCPUTime()
      ->UseRealTime();
  benchmark::RunSpecifiedBenchmarks();
  benchmark::Shutdown();
  return g_errors != 0;
}
"""
        mlir_site_dir = site_data_path("mlir-v0")
        mlir_site_dir.mkdir(parents=True, exist_ok=True)
        mlir_file_path = site_data_path("mlir-v0") / "matmul.mlir.template"
        with open(mlir_file_path, "w+") as mlir_file:
            mlir_file.write(src_content)
            mlir_file.close()
        cc_file_path = site_data_path("mlir-v0") / "benchmark_main.cc.template"
        with open(cc_file_path, "w+") as cc_file:
            cc_file.write(cc_src)
            cc_file.close()
        new_content = src_content.replace("${M}", str(mnk[0]))
        new_content = new_content.replace("${N}", str(mnk[1]))
        content = new_content.replace("${K}", str(mnk[2]))

        return self.benchmark_class.create(
            self.name_from_size(mnk),
            bytes(content, "utf-8"),
            bytes(src_content, "utf-8"),
        )
Exemplo n.º 14
0
    def validator_cb(
            env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
        """The validation callback."""
        with _CBENCH_DOWNLOAD_THREAD_LOCK:
            with fasteners.InterProcessLock(
                    cache_path(".cbench-v1-runtime-data.LOCK")):
                download_cBench_runtime_data()

        cbench_data = site_data_path(
            "llvm-v0/cbench-v1-runtime-data/runtime_data")
        for input_file_name in input_files:
            path = cbench_data / input_file_name
            if not path.is_file():
                raise FileNotFoundError(
                    f"Required benchmark input not found: {path}")

        # Create a temporary working directory to execute the benchmark in.
        with tempfile.TemporaryDirectory(
                dir=env.service.connection.working_dir) as d:
            cwd = Path(d)

            # Expand shell variable substitutions in the benchmark command.
            expanded_command = cmd.replace("$D", str(cbench_data))

            # Translate the output file names into paths inside the working
            # directory.
            output_paths = [cwd / o for o in output_files]

            if pre_execution_callback:
                pre_execution_callback(cwd)

            # Produce a gold-standard output using a reference version of
            # the benchmark.
            if compare_output or output_files:
                gs_env = env.fork()
                try:
                    # Reset to the original benchmark state and compile it.
                    gs_env.reset(benchmark=env.benchmark)
                    gs_env.write_bitcode(cwd / "benchmark.bc")
                    gold_standard = _compile_and_run_bitcode_file(
                        bitcode_file=cwd / "benchmark.bc",
                        cmd=expanded_command,
                        cwd=cwd,
                        num_runs=1,
                        # Use default optimizations for gold standard.
                        linkopts=linkopts + ["-O2"],
                        # Always assume safe.
                        sanitizer=None,
                        env=os_env,
                    )
                    if gold_standard.error:
                        return ValidationError(
                            type=f"Gold standard: {gold_standard.error.type}",
                            data=gold_standard.error.data,
                        )
                finally:
                    gs_env.close()

                # Check that the reference run produced the expected output
                # files.
                for path in output_paths:
                    if not path.is_file():
                        try:
                            output = gold_standard.output
                        except UnicodeDecodeError:
                            output = "<binary>"
                        raise FileNotFoundError(
                            f"Expected file '{path.name}' not generated\n"
                            f"Benchmark: {env.benchmark}\n"
                            f"Command: {cmd}\n"
                            f"Output: {output}")
                    path.rename(f"{path}.gold_standard")

            # Serialize the benchmark to a bitcode file that will then be
            # compiled to a binary.
            env.write_bitcode(cwd / "benchmark.bc")
            outcome = _compile_and_run_bitcode_file(
                bitcode_file=cwd / "benchmark.bc",
                cmd=expanded_command,
                cwd=cwd,
                num_runs=num_runs,
                linkopts=linkopts,
                sanitizer=sanitizer,
                env=os_env,
            )

            if outcome.error:
                return outcome.error

            # Run a user-specified validation hook.
            if validate_result:
                validate_result(outcome)

            # Difftest the console output.
            if compare_output and gold_standard.output != outcome.output:
                return ValidationError(
                    type="Wrong output",
                    data={
                        "expected": gold_standard.output,
                        "actual": outcome.output
                    },
                )

            # Difftest the output files.
            for path in output_paths:
                if not path.is_file():
                    return ValidationError(
                        type="Output not generated",
                        data={
                            "path": path.name,
                            "command": cmd
                        },
                    )
                diff = subprocess.Popen(
                    ["diff", str(path), f"{path}.gold_standard"],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                )
                stdout, _ = diff.communicate()
                if diff.returncode:
                    try:
                        stdout = stdout.decode("utf-8")
                        return ValidationError(
                            type="Wrong output (file)",
                            data={
                                "path": path.name,
                                "diff": stdout
                            },
                        )
                    except UnicodeDecodeError:
                        return ValidationError(
                            type="Wrong output (file)",
                            data={
                                "path": path.name,
                                "diff": "<binary>"
                            },
                        )
Exemplo n.º 15
0
import tempfile
from collections import Counter, defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Callable, Dict, List, NamedTuple, Optional

import fasteners

from compiler_gym.datasets.dataset import Dataset
from compiler_gym.util.download import download
from compiler_gym.util.runfiles_path import cache_path, runfiles_path, site_data_path
from compiler_gym.util.timer import Timer

_CLANG = runfiles_path("CompilerGym/compiler_gym/third_party/llvm/clang")

_CBENCH_DATA = site_data_path("llvm/cBench-v0-runtime-data/runtime_data")
_CBENCH_DATA_URL = (
    "https://dl.fbaipublicfiles.com/compiler_gym/cBench-v0-runtime-data.tar.bz2"
)
_CBENCH_DATA_SHA256 = "a1b5b5d6b115e5809ccaefc2134434494271d184da67e2ee43d7f84d07329055"

if sys.platform == "darwin":
    _COMPILE_ARGS = [
        "-L",
        "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib",
    ]
else:
    _COMPILE_ARGS = []

LLVM_DATASETS = [
    Dataset(
Exemplo n.º 16
0
def validator(
    benchmark: str,
    cmd: str,
    data: Optional[List[str]] = None,
    outs: Optional[List[str]] = None,
    platforms: Optional[List[str]] = None,
    compare_output: bool = True,
    validate_result: Optional[Callable[[BenchmarkExecutionResult],
                                       Optional[str]]] = None,
    linkopts: Optional[List[str]] = None,
    env: Optional[Dict[str, str]] = None,
    pre_execution_callback: Optional[Callable[[], None]] = None,
    sanitizers: Optional[List[LlvmSanitizer]] = None,
) -> bool:
    """Declare a new benchmark validator.

    TODO(cummins): Pull this out into a public API.

    :param benchmark: The name of the benchmark that this validator supports.
    :cmd: The shell command to run the validation. Variable substitution is
        applied to this value as follows: :code:`$BIN` is replaced by the path
        of the compiled binary and :code:`$D` is replaced with the path to the
        benchmark's runtime data directory.
    :data: A list of paths to input files.
    :outs: A list of paths to output files.
    :return: :code:`True` if the new validator was registered, else :code:`False`.
    """
    platforms = platforms or ["linux", "macos"]
    if {"darwin": "macos"}.get(sys.platform, sys.platform) not in platforms:
        return False
    infiles = data or []
    outfiles = [Path(p) for p in outs or []]
    linkopts = linkopts or []
    env = env or {}
    if sanitizers is None:
        sanitizers = LlvmSanitizer

    VALIDATORS[benchmark].append(
        _make_cBench_validator(
            cmd=cmd,
            input_files=infiles,
            output_files=outfiles,
            compare_output=compare_output,
            validate_result=validate_result,
            linkopts=linkopts,
            os_env=env,
            pre_execution_callback=pre_execution_callback,
        ))

    # Register additional validators using the sanitizers.
    if sys.platform.startswith("linux"):
        for sanitizer in sanitizers:
            VALIDATORS[benchmark].append(
                _make_cBench_validator(
                    cmd=cmd,
                    input_files=infiles,
                    output_files=outfiles,
                    compare_output=compare_output,
                    validate_result=validate_result,
                    linkopts=linkopts,
                    os_env=env,
                    pre_execution_callback=pre_execution_callback,
                    sanitizer=sanitizer,
                ))

    # Create the BenchmarkDynamicConfig object.
    cbench_data = site_data_path("llvm-v0/cbench-v1-runtime-data/runtime_data")
    DYNAMIC_CONFIGS[benchmark] = BenchmarkDynamicConfig(
        build_cmd=Command(
            argument=["$CC", "$IN"] + linkopts,
            timeout_seconds=60,
            outfile=["a.out"],
        ),
        run_cmd=Command(
            argument=cmd.replace("$BIN",
                                 "./a.out").replace("$D",
                                                    str(cbench_data)).split(),
            timeout_seconds=300,
            infile=["a.out", "_finfo_dataset"],
            outfile=[str(s) for s in outfiles],
        ),
        pre_run_cmd=[
            Command(argument=["echo", "1", ">_finfo_dataset"],
                    timeout_seconds=30),
        ],
    )

    return True
Exemplo n.º 17
0
def get_llvm_datasets(
        site_data_base: Optional[Path] = None) -> Iterable[Dataset]:
    """Instantiate the builtin LLVM datasets.

    :param site_data_base: The root of the site data path.

    :return: An iterable sequence of :class:`Dataset
        <compiler_gym.datasets.Dataset>` instances.
    """
    site_data_base = site_data_base or site_data_path("llvm-v0")

    yield AnghaBenchDataset(site_data_base=site_data_base, sort_order=0)
    # Add legacy version of Anghabench using an old manifest.
    anghabench_v0_manifest_url, anghabench_v0_manifest_sha256 = {
        "darwin": (
            "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
            "39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
        ),
        "linux": (
            "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
            "a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
        ),
    }[sys.platform]
    yield AnghaBenchDataset(
        name="benchmark://anghabench-v0",
        site_data_base=site_data_base,
        sort_order=0,
        manifest_url=anghabench_v0_manifest_url,
        manifest_sha256=anghabench_v0_manifest_sha256,
        deprecated="Please use anghabench-v1",
    )
    yield BlasDataset(site_data_base=site_data_base, sort_order=0)
    yield CLgenDataset(site_data_base=site_data_base, sort_order=0)
    yield CBenchDataset(site_data_base=site_data_base)
    # Add legacy version of cbench-v1 in which the 'b' was capitalized. This
    # is deprecated and will be removed no earlier than v0.1.10.
    yield CBenchLegacyDataset2(
        site_data_base=site_data_base,
        name="benchmark://cBench-v1",
        deprecated=(
            "Please use 'benchmark://cbench-v1' (note the lowercase name). "
            "The dataset is the same, only the name has changed"),
        manifest_url=
        "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-manifest.bz2",
        manifest_sha256=
        "635b94eeb2784dfedb3b53fd8f84517c3b4b95d851ddb662d4c1058c72dc81e0",
        sort_order=100,
    )
    yield CBenchLegacyDataset(site_data_base=site_data_base)
    yield CHStoneDataset(site_data_base=site_data_base)
    yield CsmithDataset(site_data_base=site_data_base, sort_order=0)
    yield GitHubDataset(site_data_base=site_data_base, sort_order=0)
    yield LinuxDataset(site_data_base=site_data_base, sort_order=0)
    yield LlvmStressDataset(site_data_base=site_data_base, sort_order=0)
    yield MibenchDataset(site_data_base=site_data_base, sort_order=0)
    yield MibenchV0Dataset(site_data_base=site_data_base, sort_order=100)
    yield NPBDataset(site_data_base=site_data_base, sort_order=0)
    yield OpenCVDataset(site_data_base=site_data_base, sort_order=0)
    yield POJ104Dataset(site_data_base=site_data_base, sort_order=0)
    yield POJ104LegacyDataset(site_data_base=site_data_base, sort_order=100)
    yield TensorFlowDataset(site_data_base=site_data_base, sort_order=0)