def compile_all(self): n = self.size executor = thread_pool.get_thread_pool_executor() # Since the dataset is lazily compiled, simply iterating over the full # set of URIs will compile everything. Do this in parallel. futures = (executor.submit(self.benchmark, uri) for uri in self.benchmark_uris()) for i, future in enumerate(as_completed(futures), start=1): future.result() print( f"\r\033[KCompiled {i} of {n} programs ({i/n:.1%} complete)", flush=True, end="", )
def ivalidate(self, env: "CompilerEnv") -> Iterable[ValidationError]: # noqa: F821 """Run the validation callbacks and return a generator of errors. This is an asynchronous version of :meth:`validate() <compiler_gym.datasets.Benchmark.validate>` that returns immediately. :parameter env: A :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>` instance to validate. :return: A generator of :class:`ValidationError <compiler_gym.ValidationError>` tuples that occur during validation. """ executor = thread_pool.get_thread_pool_executor() futures = ( executor.submit(validator, env) for validator in self.validation_callbacks() ) for future in as_completed(futures): result: Iterable[ValidationError] = future.result() if result: yield from result
def validate_states( make_env: Callable[[], CompilerEnv], states: Iterable[CompilerEnvState], nproc: Optional[int] = None, inorder: bool = False, ) -> Iterable[ValidationResult]: """A parallelized implementation of :meth:`env.validate() <compiler_gym.envs.CompilerEnv.validate>` for batched validation. :param make_env: A callback which instantiates a compiler environment. :param states: A sequence of compiler environment states to validate. :param nproc: The number of parallel worker processes to run. :param inorder: Whether to return results in the order they were provided, or in the order that they are available. :return: An iterator over validation results. The order of results may differ from the input states. """ executor = thread_pool.get_thread_pool_executor() if nproc == 1: map_func = map elif inorder: map_func = executor.map else: # The validation function of benchmarks can vary wildly in computational # demands. Shuffle the order of states (unless explicitly asked for them # to be kept inorder) as crude load balancing for the case where # multiple states are provided for each benchmark. states = list(states) random.shuffle(states) def map_func(func, envs, states): futures = (executor.submit(func, env, state) for env, state in zip(envs, states)) return (r.result() for r in as_completed(futures)) yield from map_func(_validate_states_worker, [make_env] * len(states), states)
def make_benchmark( inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]], copt: Optional[List[str]] = None, system_includes: bool = True, timeout: int = 600, ) -> Benchmark: """Create a benchmark for use by LLVM environments. This function takes one or more inputs and uses them to create an LLVM bitcode benchmark that can be passed to :meth:`compiler_gym.envs.LlvmEnv.reset`. The following input types are supported: +-----------------------------------------------------+---------------------+-------------------------------------------------------------+ | **File Suffix** | **Treated as** | **Converted using** | +-----------------------------------------------------+---------------------+-------------------------------------------------------------+ | :code:`.bc` | LLVM IR bitcode | No conversion required. | +-----------------------------------------------------+---------------------+-------------------------------------------------------------+ | :code:`.ll` | LLVM IR text format | Assembled to bitcode using llvm-as. | +-----------------------------------------------------+---------------------+-------------------------------------------------------------+ | :code:`.c`, :code:`.cc`, :code:`.cpp`, :code:`.cxx` | C / C++ source | Compiled to bitcode using clang and the given :code:`copt`. | +-----------------------------------------------------+---------------------+-------------------------------------------------------------+ .. note:: The LLVM IR format has no compatability guarantees between versions (see `LLVM docs <https://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility>`_). You must ensure that any :code:`.bc` and :code:`.ll` files are compatible with the LLVM version used by CompilerGym, which can be reported using :func:`env.compiler_version <compiler_gym.envs.CompilerEnv.compiler_version>`. E.g. for single-source C/C++ programs, you can pass the path of the source file: >>> benchmark = make_benchmark('my_app.c') >>> env = gym.make("llvm-v0") >>> env.reset(benchmark=benchmark) The clang invocation used is roughly equivalent to: .. code-block:: $ clang my_app.c -O0 -c -emit-llvm -o benchmark.bc Additional compile-time arguments to clang can be provided using the :code:`copt` argument: >>> benchmark = make_benchmark('/path/to/my_app.cpp', copt=['-O2']) If you need more fine-grained control over the options, you can directly construct a :class:`ClangInvocation <compiler_gym.envs.llvm.ClangInvocation>` to pass a list of arguments to clang: >>> benchmark = make_benchmark( ClangInvocation(['/path/to/my_app.c'], system_includes=False, timeout=10) ) For multi-file programs, pass a list of inputs that will be compiled separately and then linked to a single module: >>> benchmark = make_benchmark([ 'main.c', 'lib.cpp', 'lib2.bc', 'foo/input.bc' ]) :param inputs: An input, or list of inputs. :param copt: A list of command line options to pass to clang when compiling source files. :param system_includes: Whether to include the system standard libraries during compilation jobs. This requires a system toolchain. See :func:`get_system_library_flags`. :param timeout: The maximum number of seconds to allow clang to run before terminating. :return: A :code:`Benchmark` instance. :raises FileNotFoundError: If any input sources are not found. :raises TypeError: If the inputs are of unsupported types. :raises OSError: If a suitable compiler cannot be found. :raises BenchmarkInitError: If a compilation job fails. :raises TimeoutExpired: If a compilation job exceeds :code:`timeout` seconds. """ copt = copt or [] bitcodes: List[Path] = [] clang_jobs: List[ClangInvocation] = [] ll_paths: List[Path] = [] def _add_path(path: Path): if not path.is_file(): raise FileNotFoundError(path) if path.suffix == ".bc": bitcodes.append(path.absolute()) elif path.suffix in {".c", ".cc", ".cpp", ".cxx"}: clang_jobs.append( ClangInvocation.from_c_file( path, copt=copt, system_includes=system_includes, timeout=timeout ) ) elif path.suffix == ".ll": ll_paths.append(path) else: raise ValueError(f"Unrecognized file type: {path.name}") # Determine from inputs the list of pre-compiled bitcodes and the clang # invocations required to compile the bitcodes. if isinstance(inputs, str) or isinstance(inputs, Path): _add_path(Path(inputs)) elif isinstance(inputs, ClangInvocation): clang_jobs.append(inputs) else: for input in inputs: if isinstance(input, str) or isinstance(input, Path): _add_path(Path(input)) elif isinstance(input, ClangInvocation): clang_jobs.append(input) else: raise TypeError(f"Invalid input type: {type(input).__name__}") # Shortcut if we only have a single pre-compiled bitcode. if len(bitcodes) == 1 and not clang_jobs and not ll_paths: bitcode = bitcodes[0] return Benchmark.from_file(uri=f"benchmark://file-v0{bitcode}", path=bitcode) tmpdir_root = transient_cache_path(".") tmpdir_root.mkdir(exist_ok=True, parents=True) with tempfile.TemporaryDirectory( dir=tmpdir_root, prefix="llvm-make_benchmark-" ) as d: working_dir = Path(d) clang_outs = [ working_dir / f"clang-out-{i}.bc" for i in range(1, len(clang_jobs) + 1) ] llvm_as_outs = [ working_dir / f"llvm-as-out-{i}.bc" for i in range(1, len(ll_paths) + 1) ] # Run the clang and llvm-as invocations in parallel. Avoid running this # code path if possible as get_thread_pool_executor() requires locking. if clang_jobs or ll_paths: llvm_as_path = str(llvm.llvm_as_path()) executor = get_thread_pool_executor() llvm_as_commands = [ [llvm_as_path, str(ll_path), "-o", bc_path] for ll_path, bc_path in zip(ll_paths, llvm_as_outs) ] # Fire off the clang and llvm-as jobs. futures = [ executor.submit(run_command, job.command(out), job.timeout) for job, out in zip(clang_jobs, clang_outs) ] + [ executor.submit(run_command, command, timeout) for command in llvm_as_commands ] # Block until finished. list(future.result() for future in as_completed(futures)) # Check that the expected files were generated. for clang_job, bc_path in zip(clang_jobs, clang_outs): if not bc_path.is_file(): raise BenchmarkInitError( f"clang failed: {' '.join(clang_job.command(bc_path))}" ) for command, bc_path in zip(llvm_as_commands, llvm_as_outs): if not bc_path.is_file(): raise BenchmarkInitError(f"llvm-as failed: {command}") all_outs = bitcodes + clang_outs + llvm_as_outs if not all_outs: raise ValueError("No inputs") elif len(all_outs) == 1: # We only have a single bitcode so read it. with open(str(all_outs[0]), "rb") as f: bitcode = f.read() else: # Link all of the bitcodes into a single module. llvm_link_cmd = [str(llvm.llvm_link_path()), "-o", "-"] + [ str(path) for path in bitcodes + clang_outs ] with Popen( llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) as llvm_link: bitcode, stderr = llvm_link.communicate(timeout=timeout) if llvm_link.returncode: raise BenchmarkInitError( f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}" ) timestamp = datetime.now().strftime("%Y%m%HT%H%M%S") uri = f"benchmark://user-v0/{timestamp}-{random.randrange(16**4):04x}" return Benchmark.from_file_contents(uri, bitcode)