Пример #1
0
    def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark:
        """Select a benchmark.

        Returns the corresponding :class:`Benchmark
        <compiler_gym.datasets.Benchmark>`, regardless of whether the containing
        dataset is installed or deprecated.

        :param uri: The parsed URI of the benchmark to return.

        :return: A :class:`Benchmark <compiler_gym.datasets.Benchmark>`
            instance.
        """
        if uri.scheme == "proto":
            path = Path(os.path.normpath(f"{uri.dataset}/{uri.path}"))
            if not path.is_file():
                raise FileNotFoundError(str(path))

            proto = BenchmarkProto()
            with open(path, "rb") as f:
                proto.ParseFromString(f.read())

            return Benchmark(proto=proto)

        if uri.scheme == "file":
            path = Path(os.path.normpath(f"{uri.dataset}/{uri.path}"))
            if not path.is_file():
                raise FileNotFoundError(str(path))

            return Benchmark.from_file(uri=uri, path=path)

        dataset = self.dataset_from_parsed_uri(uri)
        return dataset.benchmark_from_parsed_uri(uri)
def make_benchmark_of_size(size_in_bytes: int, target: int = 0) -> Benchmark:
    """Test helper. Generate a benchmark of the given size in bytes."""
    target = target or size_in_bytes
    bm = Benchmark(program=File(contents=("." * target).encode("utf-8")))
    size_offset = bm.ByteSize() - size_in_bytes
    if size_offset:
        return make_benchmark_of_size(size_in_bytes, size_in_bytes - size_offset)
    return bm
Пример #3
0
def test_dataset_proto_scheme(tmpdir):
    """Test the proto:// scheme handler."""
    tmpdir = Path(tmpdir)
    datasets = Datasets(datasets={})

    proto = BenchmarkProto(uri="hello world")
    with open(tmpdir / "file.pb", "wb") as f:
        f.write(proto.SerializeToString())

    benchmark = datasets.benchmark(f"proto://{tmpdir}/file.pb")

    assert benchmark.proto.uri == "hello world"
    assert benchmark.uri == "benchmark://hello world"
Пример #4
0
def test_invalid_benchmark_missing_file(env: CompilerEnv):
    benchmark = Benchmark(uri="benchmark://new", )

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert str(ctx.value) == "No program set"
Пример #5
0
    def require_datasets(self, datasets: List[Union[str, Dataset]]) -> None:
        """Require that the given datasets are available to the environment.

        Example usage:

            >>> env = gym.make("llvm-v0")
            >>> env.require_dataset(["npb-v0"])
            >>> env.benchmarks
            ["npb-v0/1", "npb-v0/2", ...]

        This is equivalent to calling
        :meth:`require(self, dataset) <compiler_gym.datasets.require>` on
        the list of datasets.

        :param datasets: A list of datasets to require. Each dataset is the name
            of an available dataset, the URL of a dataset to download, or a
            :class:`Dataset` instance.
        """
        dataset_installed = False
        for dataset in datasets:
            dataset_installed |= require(self, dataset)
        if dataset_installed:
            # Signal to the compiler service that the contents of the site data
            # directory has changed.
            self.service(
                self.service.stub.AddBenchmark,
                AddBenchmarkRequest(
                    benchmark=[Benchmark(uri="service://scan-site-data")]),
            )
            self.make_manifest_file()
    def __setitem__(self, uri: str, benchmark: Benchmark):
        """Add benchmark to cache."""
        # Remove any existing value to keep the cache size consistent.
        if uri in self._benchmarks:
            self._size_in_bytes -= self._benchmarks[uri].ByteSize()
            del self._benchmarks[uri]

        size = benchmark.ByteSize()
        if self.size_in_bytes + size > self.max_size_in_bytes:
            if size > self.max_size_in_bytes:
                logger.warning(
                    "Adding new benchmark with size %d bytes exceeds total "
                    "target cache size of %d bytes",
                    size,
                    self.max_size_in_bytes,
                )
            else:
                logger.debug(
                    "Adding new benchmark with size %d bytes "
                    "exceeds maximum size %d bytes, %d items",
                    size,
                    self.max_size_in_bytes,
                    self.size,
                )
            self.evict_to_capacity()

        self._benchmarks[uri] = benchmark
        self._size_in_bytes += size

        logger.debug(
            "Cached benchmark %s. Cache size = %d bytes, %d items",
            uri,
            self.size_in_bytes,
            self.size,
        )
Пример #7
0
def test_add_benchmark_invalid_path(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as d:
        tmp = Path(d) / "not_a_file"
        with pytest.raises(FileNotFoundError) as ctx:
            env.reset(benchmark=Benchmark(uri="benchmark://foo",
                                          program=File(uri=f"file:///{tmp}")))
        assert str(ctx.value) == f'File not found: "{tmp}"'
Пример #8
0
def test_add_benchmark_invalid_protocol(env: CompilerEnv):
    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=Benchmark(uri="benchmark://foo",
                                      program=File(
                                          uri="https://invalid/protocol")))
    assert (str(ctx.value) ==
            'Unsupported benchmark URI protocol: "https://invalid/protocol"')
Пример #9
0
def test_benchmark_path_invalid_protocol(env: CompilerEnv):
    benchmark = Benchmark(uri="benchmark://new",
                          program=File(uri="invalid_protocol://test"))

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert (str(ctx.value) ==
            'Unsupported benchmark URI protocol: "invalid_protocol://test"')
Пример #10
0
def test_invalid_benchmark_data(env: CompilerEnv):
    benchmark = Benchmark(
        uri="benchmark://new",
        program=File(contents="Invalid bitcode".encode("utf-8")))

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
Пример #11
0
def benchmark_from_flags() -> Optional[Union[Benchmark, str]]:
    """Returns either the name of the benchmark, or a Benchmark message."""
    if FLAGS.benchmark:
        return FLAGS.benchmark
    elif FLAGS.program_data:
        return Benchmark(uri=FLAGS.program_data, program=File(uri=FLAGS.program_data))
    else:
        # No benchmark was specified.
        return None
Пример #12
0
def test_benchmark_path_not_found(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/not_found"))

        with pytest.raises(FileNotFoundError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == f'File not found: "{tmpdir}/not_found"'
Пример #13
0
def test_benchmark_path_empty_file(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        (tmpdir / "test.bc").touch()

        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/test.bc"))

        with pytest.raises(ValueError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == f'File is empty: "{tmpdir}/test.bc"'
Пример #14
0
def test_invalid_benchmark_path_contents(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        with open(str(tmpdir / "test.bc"), "w") as f:
            f.write("Invalid bitcode")

        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/test.bc"))

        with pytest.raises(ValueError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
Пример #15
0
def test_custom_benchmark(env: LlvmEnv):
    benchmark = Benchmark(uri="benchmark://new",
                          program=File(uri=f"file:///{EXAMPLE_BITCODE_FILE}"))
    env.reset(benchmark=benchmark)
    assert env.benchmark == "benchmark://new"
Пример #16
0
def make_benchmark(
    inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]],
    copt: Optional[List[str]] = None,
    system_includes: bool = True,
    timeout: int = 600,
) -> Benchmark:
    """Create a benchmark for use by LLVM environments.

    This function takes one or more inputs and uses them to create a benchmark
    that can be passed to :meth:`compiler_gym.envs.LlvmEnv.reset`.

    For single-source C/C++ programs, you can pass the path of the source file:

    >>> benchmark = make_benchmark('my_app.c')
    >>> env = gym.make("llvm-v0")
    >>> env.reset(benchmark=benchmark)

    The clang invocation used is roughly equivalent to:

    .. code-block::

        $ clang my_app.c -O0 -c -emit-llvm -o benchmark.bc

    Additional compile-time arguments to clang can be provided using the
    :code:`copt` argument:

    >>> benchmark = make_benchmark('/path/to/my_app.cpp', copt=['-O2'])

    If you need more fine-grained control over the options, you can directly
    construct a :class:`ClangInvocation <compiler_gym.envs.llvm.ClangInvocation>`
    to pass a list of arguments to clang:

    >>> benchmark = make_benchmark(
        ClangInvocation(['/path/to/my_app.c'], timeout=10)
    )

    For multi-file programs, pass a list of inputs that will be compiled
    separately and then linked to a single module:

    >>> benchmark = make_benchmark([
        'main.c',
        'lib.cpp',
        'lib2.bc',
    ])

    If you already have prepared bitcode files, those can be linked and used
    directly:

    >>> benchmark = make_benchmark([
        'bitcode1.bc',
        'bitcode2.bc',
    ])

    .. note::
        LLVM bitcode compatibility is
        `not guaranteed <https://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility>`_,
        so you must ensure that any precompiled bitcodes are compatible with the
        LLVM version used by CompilerGym, which can be queried using
        :func:`LlvmEnv.compiler_version <compiler_gym.envs.CompilerEnv.compiler_version>`.

    :param inputs: An input, or list of inputs.
    :param copt: A list of command line options to pass to clang when compiling
        source files.
    :param system_includes: Whether to include the system standard libraries
        during compilation jobs. This requires a system toolchain. See
        :func:`get_system_includes`.
    :param timeout: The maximum number of seconds to allow clang to run before
        terminating.
    :return: A :code:`Benchmark` message.
    :raises FileNotFoundError: If any input sources are not found.
    :raises TypeError: If the inputs are of unsupported types.
    :raises OSError: If a compilation job fails.
    :raises TimeoutExpired: If a compilation job exceeds :code:`timeout` seconds.
    """
    copt = copt or []

    bitcodes: List[Path] = []
    clang_jobs: List[ClangInvocation] = []

    def _add_path(path: Path):
        # NOTE(cummins): There is some discussion about the best way to create
        # a bitcode that is unoptimized yet does not hinder downstream
        # optimization opportunities. Here we are using a configuration based
        # on -O0, yet there is a suggestion that an optimized configuration
        # can produce better results if the optimizations themselves are
        # explicitly disabled, as in: ["-Oz", "-Xclang", "-disable-llvm-optzns"]
        # See: https://lists.llvm.org/pipermail/llvm-dev/2018-August/thread.html#125365
        DEFAULT_COPT = [
            "-O",
            "-Xclang",
            "-disable-O0-optnone",
            "-Xclang",
            "-disable-llvm-passes",
        ]

        if not path.is_file():
            raise FileNotFoundError(path)

        if path.suffix == ".bc":
            bitcodes.append(path)
        elif path.suffix in {".c", ".cxx", ".cpp", ".cc"}:
            clang_jobs.append(
                ClangInvocation(
                    [str(path)] + DEFAULT_COPT + copt,
                    system_includes=system_includes,
                    timeout=timeout,
                )
            )
        else:
            raise ValueError(f"Unrecognized file type: {path.name}")

    # Determine from inputs the list of pre-compiled bitcodes and the clang
    # invocations required to compile the bitcodes.
    if isinstance(inputs, str) or isinstance(inputs, Path):
        _add_path(Path(inputs))
    elif isinstance(inputs, ClangInvocation):
        clang_jobs.append(inputs)
    else:
        for input in inputs:
            if isinstance(input, str) or isinstance(input, Path):
                _add_path(Path(input))
            elif isinstance(input, ClangInvocation):
                clang_jobs.append(input)
            else:
                raise TypeError(f"Invalid input type: {type(input).__name__}")

    if not bitcodes and not clang_jobs:
        raise ValueError("No inputs")

    # Shortcut if we only have a single pre-compiled bitcode.
    if len(bitcodes) == 1 and not clang_jobs:
        bitcode = bitcodes[0]
        return Benchmark(
            uri=f"file:///{bitcode}", program=File(uri=f"file:///{bitcode}")
        )

    with tempfile.TemporaryDirectory(dir=cache_path(".")) as d:
        working_dir = Path(d)

        # Run the clang invocations in parallel.
        clang_outs = [
            working_dir / f"out-{i}.bc" for i in range(1, len(clang_jobs) + 1)
        ]
        clang_cmds = [
            (job.command(out), job.timeout) for job, out in zip(clang_jobs, clang_outs)
        ]
        with multiprocessing.Pool() as pool:
            list(pool.imap_unordered(_run_command, clang_cmds))

        # Check that the expected files were generated.
        for i, b in enumerate(clang_outs):
            if not b.is_file():
                raise OSError(
                    f"Clang invocation failed to produce a file: {' '.join(clang_cmds[i])}"
                )

        if len(bitcodes + clang_outs) > 1:
            # Link all of the bitcodes into a single module.
            llvm_link_cmd = [str(LLVM_LINK), "-o", "-"] + [
                str(path) for path in bitcodes + clang_outs
            ]
            llvm_link = subprocess.Popen(
                llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
            )
            bitcode, stderr = _communicate(llvm_link, timeout=timeout)
            if llvm_link.returncode:
                raise OSError(
                    f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}"
                )
        else:
            # We only have a single bitcode so read it.
            with open(str(list(bitcodes + clang_outs)[0]), "rb") as f:
                bitcode = f.read()

    timestamp = datetime.now().strftime(f"%Y%m%HT%H%M%S-{random.randrange(16**4):04x}")
    return Benchmark(
        uri=f"benchmark://user/{timestamp}", program=File(contents=bitcode)
    )