def test_add_benchmark_invalid_protocol(env: CompilerEnv): with pytest.raises(ValueError) as ctx: env.reset(benchmark=Benchmark(uri="benchmark://foo", program=File( uri="https://invalid/protocol"))) assert (str(ctx.value) == 'Unsupported benchmark URI protocol: "https://invalid/protocol"')
def test_add_benchmark_invalid_path(env: CompilerEnv): with tempfile.TemporaryDirectory() as d: tmp = Path(d) / "not_a_file" with pytest.raises(FileNotFoundError) as ctx: env.reset(benchmark=Benchmark(uri="benchmark://foo", program=File(uri=f"file:///{tmp}"))) assert str(ctx.value) == f'File not found: "{tmp}"'
def make_benchmark_of_size(size_in_bytes: int, target: int = 0) -> Benchmark: """Test helper. Generate a benchmark of the given size in bytes.""" target = target or size_in_bytes bm = Benchmark(program=File(contents=("." * target).encode("utf-8"))) size_offset = bm.ByteSize() - size_in_bytes if size_offset: return make_benchmark_of_size(size_in_bytes, size_in_bytes - size_offset) return bm
def test_add_benchmark_invalid_protocol(env: CompilerEnv): with pytest.raises(ValueError) as ctx: env.reset(benchmark=Benchmark( BenchmarkProto(uri="benchmark://foo", program=File(uri="https://invalid/protocol")), )) assert str(ctx.value) == ( "Invalid benchmark data URI. " 'Only the file:/// protocol is supported: "https://invalid/protocol"')
def test_benchmark_path_invalid_protocol(env: CompilerEnv): benchmark = Benchmark(uri="benchmark://new", program=File(uri="invalid_protocol://test")) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert (str(ctx.value) == 'Unsupported benchmark URI protocol: "invalid_protocol://test"')
def test_invalid_benchmark_data(env: CompilerEnv): benchmark = Benchmark( uri="benchmark://new", program=File(contents="Invalid bitcode".encode("utf-8"))) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
def benchmark_from_flags() -> Optional[Union[Benchmark, str]]: """Returns either the name of the benchmark, or a Benchmark message.""" if FLAGS.benchmark: return FLAGS.benchmark elif FLAGS.program_data: return Benchmark(uri=FLAGS.program_data, program=File(uri=FLAGS.program_data)) else: # No benchmark was specified. return None
def from_file_contents(cls, uri: str, data: bytes): """Construct a benchmark from raw data. :param uri: The URI of the benchmark. :param data: An array of bytes that will be passed to the compiler service. """ return cls(proto=BenchmarkProto(uri=uri, program=File(contents=data)))
def test_benchmark_path_not_found(env: CompilerEnv): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) benchmark = Benchmark(uri="benchmark://new", program=File(uri=f"file:///{tmpdir}/not_found")) with pytest.raises(FileNotFoundError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == f'File not found: "{tmpdir}/not_found"'
def test_benchmark_path_empty_file(env: CompilerEnv): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) (tmpdir / "test.bc").touch() benchmark = Benchmark(uri="benchmark://new", program=File(uri=f"file:///{tmpdir}/test.bc")) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == f'File is empty: "{tmpdir}/test.bc"'
def test_benchmark_path_invalid_protocol(env: LlvmEnv): benchmark = Benchmark( BenchmarkProto(uri="benchmark://new", program=File(uri="invalid_protocol://test")), ) with pytest.raises( ValueError, match= ("Invalid benchmark data URI. " 'Only the file:/// protocol is supported: "invalid_protocol://test"'), ): env.reset(benchmark=benchmark)
def test_invalid_benchmark_path_contents(env: CompilerEnv): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) with open(str(tmpdir / "test.bc"), "w") as f: f.write("Invalid bitcode") benchmark = Benchmark(uri="benchmark://new", program=File(uri=f"file:///{tmpdir}/test.bc")) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
def from_file(cls, uri: str, path: Path): """Construct a benchmark from a file. :param uri: The URI of the benchmark. :param path: A filesystem path. :raise FileNotFoundError: If the path does not exist. :return: A :class:`Benchmark <compiler_gym.datasets.Benchmark>` instance. """ path = Path(path) if not path.is_file(): raise FileNotFoundError(path) # Read the file data into memory and embed it inside the File protocol # buffer. An alternative would be to simply embed the file path in the # File.uri field, but this won't work for distributed services which # don't share a filesystem. with open(path, "rb") as f: contents = f.read() return cls(proto=BenchmarkProto(uri=uri, program=File(contents=contents)))
def __init__(self, invocation: GccInvocation, bitcode: bytes, timeout: int): uri = f"benchmark://clang-v0/{urllib.parse.quote_plus(join_cmd(invocation.original_argv))}" super().__init__( proto=BenchmarkProto(uri=str(uri), program=File(contents=bitcode)) ) self.command_line = invocation.original_argv # Modify the commandline so that it takes the bitcode file as input. # # Strip the original sources from the build command, but leave any # object file inputs. sources = set(s for s in invocation.sources if not s.endswith(".o")) build_command = [arg for arg in invocation.original_argv if arg not in sources] # Convert any object file inputs to absolute paths since the backend # service will have a different working directory. # # TODO(github.com/facebookresearch/CompilerGym/issues/325): To support # distributed execution, we should embed the contents of these object # files in the benchmark proto. object_files = set(s for s in invocation.sources if s.endswith(".o")) build_command = [ os.path.abspath(arg) if arg in object_files else arg for arg in build_command ] # Append the new source to the build command and specify the absolute path # to the output. for i in range(len(build_command) - 2, -1, -1): if build_command[i] == "-o": del build_command[i + 1] del build_command[i] build_command += ["-xir", "$IN", "-o", str(invocation.output_path)] self.proto.dynamic_config.build_cmd.argument[:] = build_command self.proto.dynamic_config.build_cmd.outfile[:] = [str(invocation.output_path)] self.proto.dynamic_config.build_cmd.timeout_seconds = timeout
def test_custom_benchmark(env: LlvmEnv): benchmark = Benchmark(uri="benchmark://new", program=File(uri=f"file:///{EXAMPLE_BITCODE_FILE}")) env.reset(benchmark=benchmark) assert env.benchmark == "benchmark://new"
def make_benchmark( inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]], copt: Optional[List[str]] = None, system_includes: bool = True, timeout: int = 600, ) -> Benchmark: """Create a benchmark for use by LLVM environments. This function takes one or more inputs and uses them to create a benchmark that can be passed to :meth:`compiler_gym.envs.LlvmEnv.reset`. For single-source C/C++ programs, you can pass the path of the source file: >>> benchmark = make_benchmark('my_app.c') >>> env = gym.make("llvm-v0") >>> env.reset(benchmark=benchmark) The clang invocation used is roughly equivalent to: .. code-block:: $ clang my_app.c -O0 -c -emit-llvm -o benchmark.bc Additional compile-time arguments to clang can be provided using the :code:`copt` argument: >>> benchmark = make_benchmark('/path/to/my_app.cpp', copt=['-O2']) If you need more fine-grained control over the options, you can directly construct a :class:`ClangInvocation <compiler_gym.envs.llvm.ClangInvocation>` to pass a list of arguments to clang: >>> benchmark = make_benchmark( ClangInvocation(['/path/to/my_app.c'], timeout=10) ) For multi-file programs, pass a list of inputs that will be compiled separately and then linked to a single module: >>> benchmark = make_benchmark([ 'main.c', 'lib.cpp', 'lib2.bc', ]) If you already have prepared bitcode files, those can be linked and used directly: >>> benchmark = make_benchmark([ 'bitcode1.bc', 'bitcode2.bc', ]) .. note:: LLVM bitcode compatibility is `not guaranteed <https://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility>`_, so you must ensure that any precompiled bitcodes are compatible with the LLVM version used by CompilerGym, which can be queried using :func:`LlvmEnv.compiler_version <compiler_gym.envs.CompilerEnv.compiler_version>`. :param inputs: An input, or list of inputs. :param copt: A list of command line options to pass to clang when compiling source files. :param system_includes: Whether to include the system standard libraries during compilation jobs. This requires a system toolchain. See :func:`get_system_includes`. :param timeout: The maximum number of seconds to allow clang to run before terminating. :return: A :code:`Benchmark` message. :raises FileNotFoundError: If any input sources are not found. :raises TypeError: If the inputs are of unsupported types. :raises OSError: If a compilation job fails. :raises TimeoutExpired: If a compilation job exceeds :code:`timeout` seconds. """ copt = copt or [] bitcodes: List[Path] = [] clang_jobs: List[ClangInvocation] = [] def _add_path(path: Path): # NOTE(cummins): There is some discussion about the best way to create # a bitcode that is unoptimized yet does not hinder downstream # optimization opportunities. Here we are using a configuration based # on -O0, yet there is a suggestion that an optimized configuration # can produce better results if the optimizations themselves are # explicitly disabled, as in: ["-Oz", "-Xclang", "-disable-llvm-optzns"] # See: https://lists.llvm.org/pipermail/llvm-dev/2018-August/thread.html#125365 DEFAULT_COPT = [ "-O", "-Xclang", "-disable-O0-optnone", "-Xclang", "-disable-llvm-passes", ] if not path.is_file(): raise FileNotFoundError(path) if path.suffix == ".bc": bitcodes.append(path) elif path.suffix in {".c", ".cxx", ".cpp", ".cc"}: clang_jobs.append( ClangInvocation( [str(path)] + DEFAULT_COPT + copt, system_includes=system_includes, timeout=timeout, ) ) else: raise ValueError(f"Unrecognized file type: {path.name}") # Determine from inputs the list of pre-compiled bitcodes and the clang # invocations required to compile the bitcodes. if isinstance(inputs, str) or isinstance(inputs, Path): _add_path(Path(inputs)) elif isinstance(inputs, ClangInvocation): clang_jobs.append(inputs) else: for input in inputs: if isinstance(input, str) or isinstance(input, Path): _add_path(Path(input)) elif isinstance(input, ClangInvocation): clang_jobs.append(input) else: raise TypeError(f"Invalid input type: {type(input).__name__}") if not bitcodes and not clang_jobs: raise ValueError("No inputs") # Shortcut if we only have a single pre-compiled bitcode. if len(bitcodes) == 1 and not clang_jobs: bitcode = bitcodes[0] return Benchmark( uri=f"file:///{bitcode}", program=File(uri=f"file:///{bitcode}") ) with tempfile.TemporaryDirectory(dir=cache_path(".")) as d: working_dir = Path(d) # Run the clang invocations in parallel. clang_outs = [ working_dir / f"out-{i}.bc" for i in range(1, len(clang_jobs) + 1) ] clang_cmds = [ (job.command(out), job.timeout) for job, out in zip(clang_jobs, clang_outs) ] with multiprocessing.Pool() as pool: list(pool.imap_unordered(_run_command, clang_cmds)) # Check that the expected files were generated. for i, b in enumerate(clang_outs): if not b.is_file(): raise OSError( f"Clang invocation failed to produce a file: {' '.join(clang_cmds[i])}" ) if len(bitcodes + clang_outs) > 1: # Link all of the bitcodes into a single module. llvm_link_cmd = [str(LLVM_LINK), "-o", "-"] + [ str(path) for path in bitcodes + clang_outs ] llvm_link = subprocess.Popen( llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) bitcode, stderr = _communicate(llvm_link, timeout=timeout) if llvm_link.returncode: raise OSError( f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}" ) else: # We only have a single bitcode so read it. with open(str(list(bitcodes + clang_outs)[0]), "rb") as f: bitcode = f.read() timestamp = datetime.now().strftime(f"%Y%m%HT%H%M%S-{random.randrange(16**4):04x}") return Benchmark( uri=f"benchmark://user/{timestamp}", program=File(contents=bitcode) )