Exemplo n.º 1
0
    def __call__(self, env: CompilerEnv, seed: int = 0xCC) -> CompilerEnvState:
        """Autotune the given environment.

        :param env: The environment to autotune.

        :param seed: The random seed for the autotuner.

        :returns: A CompilerEnvState tuple describing the autotuning result.
        """
        # Run the autotuner in a temporary working directory and capture the
        # stdout/stderr.
        with tempfile.TemporaryDirectory(dir=transient_cache_path("."),
                                         prefix="autotune-") as tmpdir:
            with temporary_working_directory(Path(tmpdir)):
                with capture_output():
                    with Timer() as timer:
                        self.autotune(env, seed=seed, **self.autotune_kwargs)

        return CompilerEnvState(
            benchmark=env.benchmark.uri,
            commandline=env.commandline(),
            walltime=timer.time,
            reward=self.optimization_target.final_reward(env),
        )
Exemplo n.º 2
0
def run_manual_env(env: CompilerEnv):
    """Run an environment manually.

    The manual environment allows the user to step through the environment,
    selection observations, rewards, and actions to run as they see fit. This is
    useful for debugging.

    :param env: The environment to run.
    """
    benchmark = None
    if not env.benchmark:
        # Allow the user to choose a benchmark, with the first choice being
        # to select randomly.
        benchmarks = sorted(env.benchmarks)
        if not benchmarks:
            print(
                "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks"
            )
            print("Exiting...")
            env.close()
            return

        # Strip default benchmark:// protocol.
        for i, benchmark in enumerate(benchmarks):
            if benchmark.startswith("benchmark://"):
                benchmarks[i] = benchmark[len("benchmark://"):]

        benchmark_index = user_input.read_list_index("Benchmark",
                                                     ["random"] + benchmarks)
        if benchmark_index:
            benchmark = benchmarks[benchmark_index - 1]
        else:
            benchmark = None

    with Timer() as timer:
        eager_observation = env.reset(benchmark=benchmark)

    print(f"Reset {env.benchmark} environment in {timer}")
    if env.observation_space and eager_observation is not None:
        print(
            f"Observation: {env.observation_space.to_string(eager_observation)}"
        )

    observation_names = sorted(env.observation.spaces.keys())
    reward_names = sorted(env.reward.spaces.keys())
    last_eager_reward: Optional[float] = None
    step_count = 1

    while True:
        print(
            f"\nStep {step_count}. Select: [{emph('a')}]ction "
            f"[{emph('o')}]bservation [{emph('r')}]eward "
            f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ",
            end="",
            flush=True,
        )
        while True:
            c = user_input.read_char()
            if c == "a":
                print("action", flush=True)
                index = user_input.read_list_index("Actions", ["random"] +
                                                   env.action_space.names)
                step_count += 1
                with Timer() as t:
                    if index == 0:
                        # User selected "random" action.
                        index = env.action_space.sample()
                    else:
                        # Offset to remove "random" action from index.
                        index -= 1
                    eager_observation, eager_reward, done, info = env.step(
                        index)

                # Print the eager observation, if available.
                if env.observation_space and eager_observation is not None:
                    print(
                        f"Observation: {env.observation_space.to_string(eager_observation)}"
                    )

                # Print the eager reward and the diff, if available.
                if env.reward_space and eager_reward is not None:
                    reward_diff = ""
                    if last_eager_reward is not None and eager_reward is not None:
                        reward_diff = (
                            f" (change: {eager_reward - last_eager_reward:.6f})"
                        )
                    print(f"Reward: {eager_reward:.6f}{reward_diff}")
                    last_eager_reward = eager_reward

                print(
                    f"Action {env.action_space.names[index]} in {t}.",
                    " No effect." if info.get("action_had_no_effect") else "",
                    flush=True,
                )
                if done:
                    print("Episode ended by environment: ",
                          info["error_details"])
                    env.close()
                    return
                break
            if c == "o":
                print("observation", flush=True)
                observation_name = user_input.read_list_value(
                    "Observable values", observation_names)
                with Timer() as timer:
                    value = env.observation[observation_name]
                print(
                    env.observation.spaces[observation_name].to_string(value))
                print(f"Observation {observation_name} in {timer}")
                break
            elif c == "r":
                print("reward", flush=True)
                reward_name = user_input.read_list_value(
                    "Rewards", reward_names)
                with Timer(f"Reward {reward_name}"):
                    print(f"{env.reward[reward_name]:.6f}")
                break
            elif c == "c":
                print("commandline")
                print("$", env.commandline(), flush=True)
                break
            elif c == "e":
                print("end", flush=True)
                with Timer("Closed environment"):
                    env.close()
                print("Have a nice day!")
                return
    def from_agent(cls,
                   env: CompilerEnv,
                   agent,
                   runtime: bool = True,
                   runtimes_count: int = 30):
        # We calculate our own reward at the end, no need for incremental
        # rewards during inference.
        env.reward_space = None

        # Run inference on the environment.
        observation, done = env.reset(), False
        with Timer() as inference_timer:
            while not done:
                action = agent.compute_action(observation)
                observation, _, done, _ = env.step(action)

        instruction_count_init = env.unwrapped.observation[
            "IrInstructionCountO0"]
        instruction_count_final = env.unwrapped.observation[
            "IrInstructionCount"]
        instruction_count_oz = env.unwrapped.observation[
            "IrInstructionCountOz"]

        object_size_init = env.unwrapped.observation["ObjectTextSizeO0"]
        object_size_final = env.unwrapped.observation["ObjectTextSizeBytes"]
        object_size_oz = env.unwrapped.observation["ObjectTextSizeOz"]

        runtimes_init = []
        runtimes_o3 = []
        runtimes_final = []

        try:
            if runtime and env.unwrapped.observation["IsRunnable"]:
                env.send_param("llvm.set_runtimes_per_observation_count",
                               str(runtimes_count))
                env.unwrapped.observation["Runtime"]  # warmup
                runtimes_final = env.unwrapped.observation["Runtime"].tolist()
                assert (len(runtimes_final) == runtimes_count
                        ), f"{len(runtimes_final)} != {runtimes_count}"

                env.reset()
                env.send_param("llvm.set_runtimes_per_observation_count",
                               str(runtimes_count))
                env.unwrapped.observation["Runtime"]  # warmup
                runtimes_init = env.unwrapped.observation["Runtime"].tolist()
                assert (len(runtimes_init) == runtimes_count
                        ), f"{len(runtimes_init)} != {runtimes_count}"

                env.send_param("llvm.apply_baseline_optimizations", "-O3")
                env.unwrapped.observation["Runtime"]  # warmup
                runtimes_o3 = env.unwrapped.observation["Runtime"].tolist()
                assert (len(runtimes_o3) == runtimes_count
                        ), f"{len(runtimes_o3)} != {runtimes_count}"
        except Exception as e:  # pylint: disable=broad-except
            logger.warning("Failed to compute runtime: %s", e)

        return cls(
            benchmark=env.benchmark.uri,
            inference_walltime_seconds=inference_timer.time,
            commandline=env.commandline(),
            episode_len=len(env.actions),
            instruction_count_init=instruction_count_init,
            instruction_count_final=instruction_count_final,
            instruction_count_oz=instruction_count_oz,
            instruction_count_reduction=instruction_count_oz /
            max(instruction_count_final, 1),
            object_size_init=object_size_init,
            object_size_final=object_size_final,
            object_size_oz=object_size_oz,
            object_size_reduction=object_size_oz / max(object_size_final, 1),
            runtimes_init=runtimes_init,
            runtimes_final=runtimes_final,
            runtimes_o3=runtimes_o3,
            runtime_reduction=np.median(runtimes_o3 or [0]) /
            max(np.median(runtimes_final or [0]), 1),
        )
Exemplo n.º 4
0
def test_commandline(env: CompilerEnv):
    env.reset(benchmark="cBench-v0/crc32")
    assert env.commandline() == "opt  input.bc -o output.bc"