def test_eager_reward(env: CompilerEnv): env.reward_space = "codesize" env.reset() observation, reward, done, info = env.step(0) assert observation is None assert reward == 0 assert not done
def test_service_env_dies_reset(env: CompilerEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset("cBench-v0/crc32") # Kill the service. env.service.close() # Check that the environment doesn't fall over. observation, reward, done, info = env.step(0) assert done, info["error_details"] assert not env.in_episode # Check that default values are returned. np.testing.assert_array_equal(observation, np.zeros(AUTOPHASE_FEATURE_DIM)) assert reward == 0 # Reset the environment and check that it works. env.reset(benchmark="cBench-v0/crc32") assert env.in_episode observation, reward, done, info = env.step(0) assert not done, info["error_details"] assert observation is not None assert reward is not None
def test_service_env_dies_reset(env: CompilerEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset("cbench-v1/crc32") # Kill the service. Note killing the service for a ManagedConnection will # result in a ServiceError because we have not ended the session we started # with env.reset() above. For UnmanagedConnection, this error will not be # raised. try: env.service.close() except ServiceError as e: assert "Service exited with returncode " in str(e) # Check that the environment doesn't fall over. observation, reward, done, info = env.step(0) assert done, info["error_details"] assert not env.in_episode # Check that default values are returned. np.testing.assert_array_equal(observation, np.zeros(AUTOPHASE_FEATURE_DIM)) assert reward == 0 # Reset the environment and check that it works. env.reset(benchmark="cbench-v1/crc32") assert env.in_episode observation, reward, done, info = env.step(0) assert not done, info["error_details"] assert observation is not None assert reward is not None
def test_default_reward(env: CompilerEnv): """Test default reward space.""" env.reward_space = "runtime" env.reset() observation, reward, done, info = env.step(0) assert observation is None assert reward == 0 assert not done
def test_step(env: CompilerEnv, observation_space: str, reward_space: str): """Request every combination of observation and reward in a fresh environment.""" env.reward_space = None env.observation_space = None env.reset(benchmark="cbench-v1/crc32") observation = env.observation[observation_space] assert observation is not None reward = env.reward[reward_space] assert reward is not None
def test_step(env: CompilerEnv, action_name: str): """Run each action on a single benchmark.""" env.reward_space = "IrInstructionCount" env.observation_space = "Autophase" env.reset(benchmark="cBench-v0/crc32") observation, reward, done, _ = env.step( env.action_space.from_string(action_name)) assert isinstance(observation, np.ndarray) assert observation.shape == (AUTOPHASE_FEATURE_DIM, ) assert isinstance(reward, float) assert isinstance(done, bool)
def run_one_trial(env: CompilerEnv, reward_space: str, min_steps: int, max_steps: int) -> Optional[float]: """Run a random number of random steps in an environment and return the cumulative reward. :return: A cumulative reward. """ num_steps = random.randint(min_steps, max_steps) warmup_actions = [env.action_space.sample() for _ in range(num_steps)] env.reward_space = reward_space _, _, done, _ = env.multistep(warmup_actions) if done: return None return env.episode_reward
def run_one_trial( env: CompilerEnv, reward_space: str, action: int, max_warmup_steps: int ) -> Optional[float]: """Run a random number of "warmup" steps in an environment, then compute the immediate reward of the given action. :return: An immediate reward. """ num_warmup_steps = random.randint(0, max_warmup_steps) warmup_actions = [env.action_space.sample() for _ in range(num_warmup_steps)] env.reward_space = reward_space _, _, done, _ = env.step(warmup_actions) if done: return None _, (reward,), done, _ = env.step(action, rewards=[reward_space]) return None if done else reward
def replay_actions_from_logs(env: CompilerEnv, logdir: Path, benchmark=None) -> None: best_actions_path = logdir / logs.BEST_ACTIONS_NAME meta_path = logdir / logs.METADATA_NAME assert best_actions_path.is_file(), f"File not found: {best_actions_path}" assert meta_path.is_file(), f"File not found: {meta_path}" with open(meta_path, "rb") as f: meta = json.load(f) with open(best_actions_path) as f: actions = [ln.strip() for ln in f.readlines() if ln.strip()] benchmark = benchmark or meta["benchmark"] env.reward_space = meta["reward"] env.reset(benchmark=benchmark) replay_actions(env, actions, logdir)
def test_step(env: CompilerEnv, action_name: str): """Run each action on a single benchmark.""" env.reward_space = "IrInstructionCount" env.observation_space = "Autophase" env.reset(benchmark="cbench-v1/crc32") observation, reward, done, _ = env.step( env.action_space.from_string(action_name)) assert isinstance(observation, np.ndarray) assert observation.shape == (AUTOPHASE_FEATURE_DIM, ) assert isinstance(reward, float) assert isinstance(done, bool) try: env.close() except ServiceError as e: # env.close() will raise an error if the service terminated # ungracefully. In that case, the "done" flag should have been set. assert done, f"Service error was raised when 'done' flag not set: {e}"
def test_service_env_dies_reset(env: CompilerEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset("cBench-v0/crc32") # Kill the service. env.service.close() # Check that the environment doesn't fall over. observation, reward, done, _ = env.step(0) assert done assert observation is None assert reward is None # Reset the environment and check that it works. env.reset(benchmark="cBench-v0/crc32") observation, reward, done, _ = env.step(0) assert not done assert observation is not None assert reward is not None
def test_invalid_reward_space(env: CompilerEnv): """Test error handling with invalid reward space.""" with pytest.raises(LookupError): env.reward_space = 100
def from_agent(cls, env: CompilerEnv, agent, runtime: bool = True, runtimes_count: int = 30): # We calculate our own reward at the end, no need for incremental # rewards during inference. env.reward_space = None # Run inference on the environment. observation, done = env.reset(), False with Timer() as inference_timer: while not done: action = agent.compute_action(observation) observation, _, done, _ = env.step(action) instruction_count_init = env.unwrapped.observation[ "IrInstructionCountO0"] instruction_count_final = env.unwrapped.observation[ "IrInstructionCount"] instruction_count_oz = env.unwrapped.observation[ "IrInstructionCountOz"] object_size_init = env.unwrapped.observation["ObjectTextSizeO0"] object_size_final = env.unwrapped.observation["ObjectTextSizeBytes"] object_size_oz = env.unwrapped.observation["ObjectTextSizeOz"] runtimes_init = [] runtimes_o3 = [] runtimes_final = [] try: if runtime and env.unwrapped.observation["IsRunnable"]: env.send_param("llvm.set_runtimes_per_observation_count", str(runtimes_count)) env.unwrapped.observation["Runtime"] # warmup runtimes_final = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_final) == runtimes_count ), f"{len(runtimes_final)} != {runtimes_count}" env.reset() env.send_param("llvm.set_runtimes_per_observation_count", str(runtimes_count)) env.unwrapped.observation["Runtime"] # warmup runtimes_init = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_init) == runtimes_count ), f"{len(runtimes_init)} != {runtimes_count}" env.send_param("llvm.apply_baseline_optimizations", "-O3") env.unwrapped.observation["Runtime"] # warmup runtimes_o3 = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_o3) == runtimes_count ), f"{len(runtimes_o3)} != {runtimes_count}" except Exception as e: # pylint: disable=broad-except logger.warning("Failed to compute runtime: %s", e) return cls( benchmark=env.benchmark.uri, inference_walltime_seconds=inference_timer.time, commandline=env.commandline(), episode_len=len(env.actions), instruction_count_init=instruction_count_init, instruction_count_final=instruction_count_final, instruction_count_oz=instruction_count_oz, instruction_count_reduction=instruction_count_oz / max(instruction_count_final, 1), object_size_init=object_size_init, object_size_final=object_size_final, object_size_oz=object_size_oz, object_size_reduction=object_size_oz / max(object_size_final, 1), runtimes_init=runtimes_init, runtimes_final=runtimes_final, runtimes_o3=runtimes_o3, runtime_reduction=np.median(runtimes_o3 or [0]) / max(np.median(runtimes_final or [0]), 1), )
def test_invalid_reward_space(env: CompilerEnv, ): with pytest.raises(LookupError): env.reward_space = 100