示例#1
0
def test_add_benchmark_invalid_protocol(env: CompilerEnv):
    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=Benchmark(uri="benchmark://foo",
                                      program=File(
                                          uri="https://invalid/protocol")))
    assert (str(ctx.value) ==
            'Unsupported benchmark URI protocol: "https://invalid/protocol"')
示例#2
0
def test_add_benchmark_invalid_path(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as d:
        tmp = Path(d) / "not_a_file"
        with pytest.raises(FileNotFoundError) as ctx:
            env.reset(benchmark=Benchmark(uri="benchmark://foo",
                                          program=File(uri=f"file:///{tmp}")))
        assert str(ctx.value) == f'File not found: "{tmp}"'
def make_benchmark_of_size(size_in_bytes: int, target: int = 0) -> Benchmark:
    """Test helper. Generate a benchmark of the given size in bytes."""
    target = target or size_in_bytes
    bm = Benchmark(program=File(contents=("." * target).encode("utf-8")))
    size_offset = bm.ByteSize() - size_in_bytes
    if size_offset:
        return make_benchmark_of_size(size_in_bytes, size_in_bytes - size_offset)
    return bm
示例#4
0
def test_add_benchmark_invalid_protocol(env: CompilerEnv):
    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=Benchmark(
            BenchmarkProto(uri="benchmark://foo",
                           program=File(uri="https://invalid/protocol")), ))
    assert str(ctx.value) == (
        "Invalid benchmark data URI. "
        'Only the file:/// protocol is supported: "https://invalid/protocol"')
示例#5
0
def test_benchmark_path_invalid_protocol(env: CompilerEnv):
    benchmark = Benchmark(uri="benchmark://new",
                          program=File(uri="invalid_protocol://test"))

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert (str(ctx.value) ==
            'Unsupported benchmark URI protocol: "invalid_protocol://test"')
示例#6
0
def test_invalid_benchmark_data(env: CompilerEnv):
    benchmark = Benchmark(
        uri="benchmark://new",
        program=File(contents="Invalid bitcode".encode("utf-8")))

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
def benchmark_from_flags() -> Optional[Union[Benchmark, str]]:
    """Returns either the name of the benchmark, or a Benchmark message."""
    if FLAGS.benchmark:
        return FLAGS.benchmark
    elif FLAGS.program_data:
        return Benchmark(uri=FLAGS.program_data, program=File(uri=FLAGS.program_data))
    else:
        # No benchmark was specified.
        return None
示例#8
0
    def from_file_contents(cls, uri: str, data: bytes):
        """Construct a benchmark from raw data.

        :param uri: The URI of the benchmark.

        :param data: An array of bytes that will be passed to the compiler
            service.
        """
        return cls(proto=BenchmarkProto(uri=uri, program=File(contents=data)))
示例#9
0
def test_benchmark_path_not_found(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/not_found"))

        with pytest.raises(FileNotFoundError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == f'File not found: "{tmpdir}/not_found"'
示例#10
0
def test_benchmark_path_empty_file(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        (tmpdir / "test.bc").touch()

        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/test.bc"))

        with pytest.raises(ValueError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == f'File is empty: "{tmpdir}/test.bc"'
def test_benchmark_path_invalid_protocol(env: LlvmEnv):
    benchmark = Benchmark(
        BenchmarkProto(uri="benchmark://new",
                       program=File(uri="invalid_protocol://test")), )

    with pytest.raises(
            ValueError,
            match=
        ("Invalid benchmark data URI. "
         'Only the file:/// protocol is supported: "invalid_protocol://test"'),
    ):
        env.reset(benchmark=benchmark)
示例#12
0
def test_invalid_benchmark_path_contents(env: CompilerEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        with open(str(tmpdir / "test.bc"), "w") as f:
            f.write("Invalid bitcode")

        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/test.bc"))

        with pytest.raises(ValueError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
示例#13
0
    def from_file(cls, uri: str, path: Path):
        """Construct a benchmark from a file.

        :param uri: The URI of the benchmark.

        :param path: A filesystem path.

        :raise FileNotFoundError: If the path does not exist.

        :return: A :class:`Benchmark <compiler_gym.datasets.Benchmark>`
            instance.
        """
        path = Path(path)
        if not path.is_file():
            raise FileNotFoundError(path)
        # Read the file data into memory and embed it inside the File protocol
        # buffer. An alternative would be to simply embed the file path in the
        # File.uri field, but this won't work for distributed services which
        # don't share a filesystem.
        with open(path, "rb") as f:
            contents = f.read()
        return cls(proto=BenchmarkProto(uri=uri, program=File(contents=contents)))
示例#14
0
    def __init__(self, invocation: GccInvocation, bitcode: bytes, timeout: int):
        uri = f"benchmark://clang-v0/{urllib.parse.quote_plus(join_cmd(invocation.original_argv))}"
        super().__init__(
            proto=BenchmarkProto(uri=str(uri), program=File(contents=bitcode))
        )
        self.command_line = invocation.original_argv

        # Modify the commandline so that it takes the bitcode file as input.
        #
        # Strip the original sources from the build command, but leave any
        # object file inputs.
        sources = set(s for s in invocation.sources if not s.endswith(".o"))
        build_command = [arg for arg in invocation.original_argv if arg not in sources]

        # Convert any object file inputs to absolute paths since the backend
        # service will have a different working directory.
        #
        # TODO(github.com/facebookresearch/CompilerGym/issues/325): To support
        # distributed execution, we should embed the contents of these object
        # files in the benchmark proto.
        object_files = set(s for s in invocation.sources if s.endswith(".o"))
        build_command = [
            os.path.abspath(arg) if arg in object_files else arg
            for arg in build_command
        ]

        # Append the new source to the build command and specify the absolute path
        # to the output.
        for i in range(len(build_command) - 2, -1, -1):
            if build_command[i] == "-o":
                del build_command[i + 1]
                del build_command[i]
        build_command += ["-xir", "$IN", "-o", str(invocation.output_path)]
        self.proto.dynamic_config.build_cmd.argument[:] = build_command
        self.proto.dynamic_config.build_cmd.outfile[:] = [str(invocation.output_path)]
        self.proto.dynamic_config.build_cmd.timeout_seconds = timeout
示例#15
0
def test_custom_benchmark(env: LlvmEnv):
    benchmark = Benchmark(uri="benchmark://new",
                          program=File(uri=f"file:///{EXAMPLE_BITCODE_FILE}"))
    env.reset(benchmark=benchmark)
    assert env.benchmark == "benchmark://new"
示例#16
0
def make_benchmark(
    inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]],
    copt: Optional[List[str]] = None,
    system_includes: bool = True,
    timeout: int = 600,
) -> Benchmark:
    """Create a benchmark for use by LLVM environments.

    This function takes one or more inputs and uses them to create a benchmark
    that can be passed to :meth:`compiler_gym.envs.LlvmEnv.reset`.

    For single-source C/C++ programs, you can pass the path of the source file:

    >>> benchmark = make_benchmark('my_app.c')
    >>> env = gym.make("llvm-v0")
    >>> env.reset(benchmark=benchmark)

    The clang invocation used is roughly equivalent to:

    .. code-block::

        $ clang my_app.c -O0 -c -emit-llvm -o benchmark.bc

    Additional compile-time arguments to clang can be provided using the
    :code:`copt` argument:

    >>> benchmark = make_benchmark('/path/to/my_app.cpp', copt=['-O2'])

    If you need more fine-grained control over the options, you can directly
    construct a :class:`ClangInvocation <compiler_gym.envs.llvm.ClangInvocation>`
    to pass a list of arguments to clang:

    >>> benchmark = make_benchmark(
        ClangInvocation(['/path/to/my_app.c'], timeout=10)
    )

    For multi-file programs, pass a list of inputs that will be compiled
    separately and then linked to a single module:

    >>> benchmark = make_benchmark([
        'main.c',
        'lib.cpp',
        'lib2.bc',
    ])

    If you already have prepared bitcode files, those can be linked and used
    directly:

    >>> benchmark = make_benchmark([
        'bitcode1.bc',
        'bitcode2.bc',
    ])

    .. note::
        LLVM bitcode compatibility is
        `not guaranteed <https://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility>`_,
        so you must ensure that any precompiled bitcodes are compatible with the
        LLVM version used by CompilerGym, which can be queried using
        :func:`LlvmEnv.compiler_version <compiler_gym.envs.CompilerEnv.compiler_version>`.

    :param inputs: An input, or list of inputs.
    :param copt: A list of command line options to pass to clang when compiling
        source files.
    :param system_includes: Whether to include the system standard libraries
        during compilation jobs. This requires a system toolchain. See
        :func:`get_system_includes`.
    :param timeout: The maximum number of seconds to allow clang to run before
        terminating.
    :return: A :code:`Benchmark` message.
    :raises FileNotFoundError: If any input sources are not found.
    :raises TypeError: If the inputs are of unsupported types.
    :raises OSError: If a compilation job fails.
    :raises TimeoutExpired: If a compilation job exceeds :code:`timeout` seconds.
    """
    copt = copt or []

    bitcodes: List[Path] = []
    clang_jobs: List[ClangInvocation] = []

    def _add_path(path: Path):
        # NOTE(cummins): There is some discussion about the best way to create
        # a bitcode that is unoptimized yet does not hinder downstream
        # optimization opportunities. Here we are using a configuration based
        # on -O0, yet there is a suggestion that an optimized configuration
        # can produce better results if the optimizations themselves are
        # explicitly disabled, as in: ["-Oz", "-Xclang", "-disable-llvm-optzns"]
        # See: https://lists.llvm.org/pipermail/llvm-dev/2018-August/thread.html#125365
        DEFAULT_COPT = [
            "-O",
            "-Xclang",
            "-disable-O0-optnone",
            "-Xclang",
            "-disable-llvm-passes",
        ]

        if not path.is_file():
            raise FileNotFoundError(path)

        if path.suffix == ".bc":
            bitcodes.append(path)
        elif path.suffix in {".c", ".cxx", ".cpp", ".cc"}:
            clang_jobs.append(
                ClangInvocation(
                    [str(path)] + DEFAULT_COPT + copt,
                    system_includes=system_includes,
                    timeout=timeout,
                )
            )
        else:
            raise ValueError(f"Unrecognized file type: {path.name}")

    # Determine from inputs the list of pre-compiled bitcodes and the clang
    # invocations required to compile the bitcodes.
    if isinstance(inputs, str) or isinstance(inputs, Path):
        _add_path(Path(inputs))
    elif isinstance(inputs, ClangInvocation):
        clang_jobs.append(inputs)
    else:
        for input in inputs:
            if isinstance(input, str) or isinstance(input, Path):
                _add_path(Path(input))
            elif isinstance(input, ClangInvocation):
                clang_jobs.append(input)
            else:
                raise TypeError(f"Invalid input type: {type(input).__name__}")

    if not bitcodes and not clang_jobs:
        raise ValueError("No inputs")

    # Shortcut if we only have a single pre-compiled bitcode.
    if len(bitcodes) == 1 and not clang_jobs:
        bitcode = bitcodes[0]
        return Benchmark(
            uri=f"file:///{bitcode}", program=File(uri=f"file:///{bitcode}")
        )

    with tempfile.TemporaryDirectory(dir=cache_path(".")) as d:
        working_dir = Path(d)

        # Run the clang invocations in parallel.
        clang_outs = [
            working_dir / f"out-{i}.bc" for i in range(1, len(clang_jobs) + 1)
        ]
        clang_cmds = [
            (job.command(out), job.timeout) for job, out in zip(clang_jobs, clang_outs)
        ]
        with multiprocessing.Pool() as pool:
            list(pool.imap_unordered(_run_command, clang_cmds))

        # Check that the expected files were generated.
        for i, b in enumerate(clang_outs):
            if not b.is_file():
                raise OSError(
                    f"Clang invocation failed to produce a file: {' '.join(clang_cmds[i])}"
                )

        if len(bitcodes + clang_outs) > 1:
            # Link all of the bitcodes into a single module.
            llvm_link_cmd = [str(LLVM_LINK), "-o", "-"] + [
                str(path) for path in bitcodes + clang_outs
            ]
            llvm_link = subprocess.Popen(
                llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
            )
            bitcode, stderr = _communicate(llvm_link, timeout=timeout)
            if llvm_link.returncode:
                raise OSError(
                    f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}"
                )
        else:
            # We only have a single bitcode so read it.
            with open(str(list(bitcodes + clang_outs)[0]), "rb") as f:
                bitcode = f.read()

    timestamp = datetime.now().strftime(f"%Y%m%HT%H%M%S-{random.randrange(16**4):04x}")
    return Benchmark(
        uri=f"benchmark://user/{timestamp}", program=File(contents=bitcode)
    )