def eval_action(fkd: CompilerEnv, action: int) -> RewardAction: """Evaluate the given action.""" try: _, reward, _, _ = fkd.step(action) finally: fkd.close() return RewardAction(reward=reward, action=action)
def run_one_environment(self, env: CompilerEnv) -> None: """Run random walks in an infinite loop. Returns if the environment ends.""" try: while self.should_run_one_episode: self.total_episode_count += 1 if not self.run_one_episode(env): return finally: env.close()
def test_step(env: CompilerEnv, action_name: str): """Run each action on a single benchmark.""" env.reward_space = "IrInstructionCount" env.observation_space = "Autophase" env.reset(benchmark="cbench-v1/crc32") observation, reward, done, _ = env.step( env.action_space.from_string(action_name)) assert isinstance(observation, np.ndarray) assert observation.shape == (AUTOPHASE_FEATURE_DIM, ) assert isinstance(reward, float) assert isinstance(done, bool) try: env.close() except ServiceError as e: # env.close() will raise an error if the service terminated # ungracefully. In that case, the "done" flag should have been set. assert done, f"Service error was raised when 'done' flag not set: {e}"
def run_random_walk(env: CompilerEnv, step_count: int) -> List[float]: """Perform a random walk of the action space. :param env: The environment to use. :param step_count: The number of steps to run. This value is an upper bound - fewer steps will be performed if any of the actions lead the environment to end the episode. :return: The list of observed rewards. """ rewards = [] step_num = 0 with Timer() as episode_time: env.reset() for step_num in range(1, step_count + 1): action_index = env.action_space.sample() with Timer() as step_time: observation, reward, done, info = env.step(action_index) print(f"\n=== Step {humanize.intcomma(step_num)} ===") print(f"Action: {env.action_space.names[action_index]} " f"(changed={not info.get('action_had_no_effect')})") rewards.append(reward) print(f"Reward: {reward}") if env._eager_observation: print(f"Observation:\n{observation}") print(f"Step time: {step_time}") if done: print("Episode ended by environment") break env.close() def reward_delta(reward): delta = rewards[0] / max(reward, 1e-9) - 1 return emph(f"{'+' if delta >= 0 else ''}{delta:.2%}") print( f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} " f"({step_num / episode_time.time:.1f} steps / sec).") print(f"Init reward: {rewards[0]}") print(f"Final reward: {rewards[-1]} ({reward_delta(rewards[-1])})") print(f"Max reward: {max(rewards)} ({reward_delta(max(rewards))} " f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})") return rewards
def replay_actions_from_logs(env: CompilerEnv, logdir: Path, benchmark=None) -> None: best_actions_path = logdir / logs.BEST_ACTIONS_NAME meta_path = logdir / logs.METADATA_NAME assert best_actions_path.is_file(), f"File not found: {best_actions_path}" assert meta_path.is_file(), f"File not found: {meta_path}" with open(meta_path, "rb") as f: meta = json.load(f) with open(best_actions_path) as f: actions = [l.strip() for l in f.readlines() if l.strip()] benchmark = benchmark or meta["benchmark"] env.reward_space = meta["reward"] env.reset(benchmark=benchmark) replay_actions(env, actions, logdir) env.close()
def run_manual_env(env: CompilerEnv): """Run an environment manually. The manual environment allows the user to step through the environment, selection observations, rewards, and actions to run as they see fit. This is useful for debugging. :param env: The environment to run. """ benchmark = None if not env.benchmark: # Allow the user to choose a benchmark, with the first choice being # to select randomly. benchmarks = sorted(env.benchmarks) if not benchmarks: print( "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks" ) print("Exiting...") env.close() return # Strip default benchmark:// protocol. for i, benchmark in enumerate(benchmarks): if benchmark.startswith("benchmark://"): benchmarks[i] = benchmark[len("benchmark://"):] benchmark_index = user_input.read_list_index("Benchmark", ["random"] + benchmarks) if benchmark_index: benchmark = benchmarks[benchmark_index - 1] else: benchmark = None with Timer() as timer: eager_observation = env.reset(benchmark=benchmark) print(f"Reset {env.benchmark} environment in {timer}") if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) observation_names = sorted(env.observation.spaces.keys()) reward_names = sorted(env.reward.spaces.keys()) last_eager_reward: Optional[float] = None step_count = 1 while True: print( f"\nStep {step_count}. Select: [{emph('a')}]ction " f"[{emph('o')}]bservation [{emph('r')}]eward " f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ", end="", flush=True, ) while True: c = user_input.read_char() if c == "a": print("action", flush=True) index = user_input.read_list_index("Actions", ["random"] + env.action_space.names) step_count += 1 with Timer() as t: if index == 0: # User selected "random" action. index = env.action_space.sample() else: # Offset to remove "random" action from index. index -= 1 eager_observation, eager_reward, done, info = env.step( index) # Print the eager observation, if available. if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) # Print the eager reward and the diff, if available. if env.reward_space and eager_reward is not None: reward_diff = "" if last_eager_reward is not None and eager_reward is not None: reward_diff = ( f" (change: {eager_reward - last_eager_reward:.6f})" ) print(f"Reward: {eager_reward:.6f}{reward_diff}") last_eager_reward = eager_reward print( f"Action {env.action_space.names[index]} in {t}.", " No effect." if info.get("action_had_no_effect") else "", flush=True, ) if done: print("Episode ended by environment: ", info["error_details"]) env.close() return break if c == "o": print("observation", flush=True) observation_name = user_input.read_list_value( "Observable values", observation_names) with Timer() as timer: value = env.observation[observation_name] print( env.observation.spaces[observation_name].to_string(value)) print(f"Observation {observation_name} in {timer}") break elif c == "r": print("reward", flush=True) reward_name = user_input.read_list_value( "Rewards", reward_names) with Timer(f"Reward {reward_name}"): print(f"{env.reward[reward_name]:.6f}") break elif c == "c": print("commandline") print("$", env.commandline(), flush=True) break elif c == "e": print("end", flush=True) with Timer("Closed environment"): env.close() print("Have a nice day!") return
def test_init_benchmark(env: CompilerEnv, benchmark_name: str): """Create an environment for each benchmark and close it.""" env.reset(benchmark=benchmark_name) assert env.benchmark == benchmark_name env.close()