def do_set_benchmark(self, arg): """Set the current benchmark. set_benchmark <name> - set the benchmark The name should come from the list of benchmarks printed by the command list_benchmarks. Tab completion will be used if available. This command will delete the action history. Use '-' for a random benchmark. """ if arg == "-": arg = self.env.datasets.random_benchmark().uri print(f"set_benchmark {arg}") try: benchmark = self.env.datasets.benchmark(arg) self.stack.clear() # Set the current benchmark with Timer() as timer: observation = self.env.reset(benchmark=benchmark) print(f"Reset {self.env.benchmark} environment in {timer}") if self.env.observation_space and observation is not None: print( f"Observation: {self.env.observation_space_spec.to_string(observation)}" ) self.set_prompt() except LookupError: print("Unknown benchmark, '" + arg + "'") print("Benchmarks are listed with command, list_benchmarks")
def get_rewards( benchmark: Union[Benchmark, str], reward_space: str, num_trials: int, min_steps: int, max_steps: int, max_attempts_multiplier: int = 5, ) -> SensitivityAnalysisResult: """Run random trials to get a list of num_trials episode rewards.""" rewards, runtimes = [], [] num_attempts = 0 while (num_attempts < max_attempts_multiplier * num_trials and len(rewards) < num_trials): num_attempts += 1 with env_from_flags(benchmark=benchmark) as env: env.observation_space = None env.reward_space = None env.reset(benchmark=benchmark) benchmark = env.benchmark with Timer() as t: reward = run_one_trial(env, reward_space, min_steps, max_steps) if reward is not None: rewards.append(reward) runtimes.append(t.time) return SensitivityAnalysisResult(name=env.benchmark, runtimes=np.array(runtimes), rewards=np.array(rewards))
def run(self): # Determine if we need to print a header. header = (not Path(FLAGS.leaderboard_results).is_file() or os.stat(FLAGS.leaderboard_results).st_size == 0) with CompilerEnvStateWriter(open(FLAGS.leaderboard_results, "a"), header=header) as writer: for benchmark in self.benchmarks: self.env.reset(benchmark=benchmark) with Timer() as timer: self.policy(self.env) # Sanity check that the policy didn't change the expected # experimental setup. assert self.env.in_episode, "Environment is no longer in an episode" assert self.env.benchmark and ( self.env.benchmark == benchmark), "Policy changed environment benchmark" assert self.env.reward_space, "Policy unset environment reward space" assert (self.env.reward_space.name == "IrInstructionCountOz" ), "Policy changed environment reward space" # Override walltime in the generated state. state = self.env.state.copy() state.walltime = timer.time writer.write_state(state, flush=True) self.states.append(state) if not self.alive: return
def get_rewards( action: int, action_name: str, reward_space: str, num_trials: int, max_warmup_steps: int, max_attempts_multiplier: int = 5, ) -> SensitivityAnalysisResult: """Run random trials to get a list of num_trials immediate rewards.""" rewards, runtimes = [], [] benchmark = benchmark_from_flags() num_attempts = 0 while ( num_attempts < max_attempts_multiplier * num_trials and len(rewards) < num_trials ): num_attempts += 1 with env_from_flags(benchmark=benchmark) as env: env.observation_space = None env.reward_space = None env.reset(benchmark=benchmark) with Timer() as t: reward = run_one_trial(env, reward_space, action, max_warmup_steps) if reward is not None: rewards.append(reward) runtimes.append(t.time) return SensitivityAnalysisResult( name=action_name, runtimes=np.array(runtimes), rewards=np.array(rewards) )
def flush(self) -> None: """Flush the buffered steps and observations to database.""" n_steps, n_observations = len(self.step_buffer), len(self.observations_buffer) # Nothing to flush. if not n_steps: return with Timer() as flush_time: # House keeping notice: Keep these statements in sync with record(). self.cursor.executemany( "INSERT OR IGNORE INTO States VALUES (?, ?, ?, ?, ?)", self.step_buffer, ) self.cursor.executemany( "INSERT OR IGNORE INTO Observations VALUES (?, ?, ?, ?, ?, ?)", ((k, *v) for k, v in self.observations_buffer.items()), ) self.step_buffer = [] self.observations_buffer = {} self.connection.commit() logging.info( "Wrote %d state records and %d observations in %s. Last flush %s ago", n_steps, n_observations, flush_time, humanize_duration(time() - self.last_commit), ) self.last_commit = time()
def do_greedy(self, arg): """Do some greedy steps. All actions are tried and the one with the biggest positive reward is accepted. An argument, if given, should be the number of steps to take. The search will try to improve the default reward. Please call set_default_reward if needed. """ if not self.env.reward_space: print("No default reward set. Call set_default_reward") return try: num_steps = max(1, int(arg)) except ValueError: num_steps = 1 with Timer() as timer: for i in range(num_steps): best = self.get_action_rewards()[0] if (not best.done) and (best.reward is not None) and (best.reward > 0): self.env.step(best.action_index) self.stack.append(best) print( f"Step: {i+1} Selected action: {best.action_name} Reward: {best.reward:.6f}" ) else: print(f"Step: {i+1} Selected no action.") if i + 1 < num_steps: print("Greedy search stopping early.") break print(f"Greedy {i+1} steps in {timer}")
def validate_state(env: CompilerEnv, state: CompilerEnvState) -> ValidationResult: """Validate a :class:`CompilerEnvState <compiler_gym.envs.CompilerEnvState>`. :param env: A compiler environment. :param state: The environment state to validate. :return: A :class:`ValidationResult <compiler_gym.ValidationResult>` instance. """ error_messages = [] validation = { "state": state, "actions_replay_failed": False, "reward_validated": False, "reward_validation_failed": False, "benchmark_semantics_validated": False, "benchmark_semantics_validation_failed": False, } if state.reward is not None and env.reward_space is None: raise ValueError("Reward space not specified") with Timer() as walltime: env.reset(benchmark=state.benchmark) # Use a while loop here so that we can `break` early out of the # validation process in case a step fails. while True: try: reward = _llvm_replay_commandline(env, state.commandline) except (ValueError, OSError) as e: validation["actions_replay_failed"] = True error_messages.append(str(e)) break if state.reward is not None and env.reward_space.deterministic: validation["reward_validated"] = True # If reward deviates from the expected amount record the # error but continue with the remainder of the validation. if not math.isclose( reward, state.reward, rel_tol=1e-5, abs_tol=1e-10): validation["reward_validation_failed"] = True error_messages.append( f"Expected reward {state.reward:.4f} but received reward {reward:.4f}" ) validate_semantics = LLVM_BENCHMARK_VALIDATION_CALLBACKS.get( state.benchmark) if validate_semantics: validation["benchmark_semantics_validated"] = True semantics_error = validate_semantics(env) if semantics_error: validation["benchmark_semantics_validation_failed"] = True error_messages.append(semantics_error) # Finished all checks, break the loop. break return ValidationResult(walltime=walltime.time, error_details="\n".join(error_messages), **validation)
def train(dataset, data_loader, model, num_epoch, device): optimizer = torch.optim.Adam(model.parameters()) for epoch in range(num_epoch): with Timer(f"Epoch {epoch + 1} of {num_epoch} ({(epoch + 1) / num_epoch:.1%})"): dataset.set_distribution_type("train") dataset_looper(epoch, data_loader, model, device, optimizer) dataset.set_distribution_type("dev") dataset_looper(epoch, data_loader, model, device, train=False)
def do_back(self, arg): """Undo the last action, if any""" if self.stack: top = self.stack.pop() with Timer(f"Undid {top.action_name}"): self.rerun_stack() else: print("No actions to undo")
def run_random_walk(env: CompilerEnv, step_count: int) -> List[float]: """Perform a random walk of the action space. :param env: The environment to use. :param step_count: The number of steps to run. This value is an upper bound - fewer steps will be performed if any of the actions lead the environment to end the episode. :return: The list of observed rewards. """ rewards = [] step_num = 0 with Timer() as episode_time: env.reset() for step_num in range(1, step_count + 1): action_index = env.action_space.sample() with Timer() as step_time: observation, reward, done, info = env.step(action_index) print(f"\n=== Step {humanize.intcomma(step_num)} ===") print(f"Action: {env.action_space.names[action_index]} " f"(changed={not info.get('action_had_no_effect')})") rewards.append(reward) print(f"Reward: {reward}") if env._eager_observation: print(f"Observation:\n{observation}") print(f"Step time: {step_time}") if done: print("Episode ended by environment") break env.close() def reward_delta(reward): delta = rewards[0] / max(reward, 1e-9) - 1 return emph(f"{'+' if delta >= 0 else ''}{delta:.2%}") print( f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} " f"({step_num / episode_time.time:.1f} steps / sec).") print(f"Init reward: {rewards[0]}") print(f"Final reward: {rewards[-1]} ({reward_delta(rewards[-1])})") print(f"Max reward: {max(rewards)} ({reward_delta(max(rewards))} " f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})") return rewards
def get_runtimes(op: Callable[[], Any], n: int): """Run `n` reptitions of function `op`, ignoring any errors.""" runtimes = [] for _ in range(n): try: with Timer() as timer: op() runtimes.append(timer.time) except Exception as e: # pylint: disable=broad-except logger.warning("Op failed: %s", e) return runtimes
def get_step_times(env: CompilerEnv, num_steps: int, batched=False): while batched: # Run all actions in a single step(). steps = [env.action_space.sample() for _ in range(num_steps)] with Timer() as timer: _, _, done, _ = env.step(steps) if not done: return [timer.time / num_steps] * num_steps env.reset() # Run each action as a step(). runtimes = [] while len(runtimes) < num_steps: with Timer() as timer: _, _, done, _ = env.step(env.action_space.sample()) if done: env.reset() else: runtimes.append(timer.time) return runtimes
def run_random_walk(env: CompilerEnv, step_count: int) -> None: """Perform a random walk of the action space. :param env: The environment to use. :param step_count: The number of steps to run. This value is an upper bound - fewer steps will be performed if any of the actions lead the environment to end the episode. """ rewards = [] step_num = 0 with Timer() as episode_time: env.reset() for step_num in range(1, step_count + 1): action_index = env.action_space.sample() with Timer() as step_time: observation, reward, done, info = env.step(action_index) print(f"\n=== Step {humanize.intcomma(step_num)} ===\n" f"Action: {env.action_space.names[action_index]} " f"(changed={not info.get('action_had_no_effect')})\n" f"Reward: {reward}") rewards.append(reward) if env.observation_space: print(f"Observation:\n{observation}") print(f"Step time: {step_time}") if done: print("Episode ended by environment") break def reward_percentage(reward, rewards): if sum(rewards) == 0: return 0 percentage = reward / sum(rewards) return emph(f"{'+' if percentage >= 0 else ''}{percentage:.2%}") print( f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} " f"({step_num / episode_time.time:.1f} steps / sec).\n" f"Total reward: {sum(rewards)}\n" f"Max reward: {max(rewards)} ({reward_percentage(max(rewards), rewards)} " f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})")
def main(argv): """Main entry point.""" argv = FLAGS(argv) if len(argv) != 1: raise app.UsageError(f"Unknown command line arguments: {argv[1:]}") with Timer("Initialized environment"): benchmark = benchmark_from_flags() env = env_from_flags(benchmark) shell = CompilerGymShell(env) shell.cmdloop()
def do_require_dataset(self, arg): """Require dataset The argument is the name of the dataset to require. """ if self.get_datasets().count(arg): with Timer(f"Downloaded dataset {arg}"): require(self.env, arg) self.init_benchmarks() # FIXME CHRIS, why can't I get it to update the list of benchmarks? # I have to restart print("Application must be restarted to make changes visible.") else: print("Unknown dataset, '" + arg + "'") print("Available datasets are listed with command, list_available_datasets")
def do_hill_climb(self, arg): """Do some steps of hill climbing. A random action is taken, but only accepted if it has a positive reward. An argument, if given, should be the number of steps to take. The search will try to improve the default reward. Please call set_default_reward if needed. """ if not self.env.benchmark: print("No benchmark set, please call the set_benchmark command") return if not self.env.reward_space: print("No default reward set. Call set_default_reward") return try: num_steps = max(1, int(arg)) except ValueError: num_steps = 1 num_accepted = 0 cum_reward = 0 with Timer() as timer: for i in range(num_steps): index = random.randrange(self.env.action_space.n) action = self.env.action_space.names[index] observation, reward, done, info = self.env.step(index) accept = not done and (reward is not None) and (reward > 0) if accept: # Append the history element hist = ActionHistoryElement( action, index, observation, reward, done, info ) self.stack.append(hist) num_accepted += 1 cum_reward += reward else: # Basically undo self.rerun_stack() print( f"Step: {i+1} Action: {action} Reward: {reward:.6f} Accept: {accept}" ) if done: print("Episode ended by environment: ", info["error_details"]) print( f"Hill climb complete in {timer}. Accepted {num_accepted} of {num_steps} steps for total reward of {cum_reward}." )
def main(argv): # Initialize a Q table. q_table: Dict[StateActionTuple, float] = {} benchmark = benchmark_from_flags() assert benchmark, "You must specify a benchmark using the --benchmark flag" with gym.make("llvm-ic-v0", benchmark=benchmark) as env: env.observation_space = "Autophase" # Train a Q-table. with Timer("Constructing Q-table"): train(q_table, env) # Rollout resulting policy. rollout(q_table, env, printout=True)
def do_reward(self, arg): """Show an reward value reward <name> - show the named reward The name should come from the list of rewards printed by the command list_rewards. Tab completion will be used if available. """ if arg == "" and self.env.reward_space: arg = self.env.reward_space.id if self.rewards.count(arg): with Timer(f"Reward {arg}"): print(f"{self.env.reward[arg]:.6f}") else: print(f"Unknown reward, '{arg}'") print("Rewards are listed with command, list_rewards")
def do_set_default_reward(self, arg): """Set the default reward space set_default_reward <name> - set the named reward The name should come from the list of rewards printed by the command list_rewards. Tab completion will be used if available. With no argument it will set to None. This command will rerun the actions on the stack. """ arg = arg.strip() if not arg or self.rewards.count(arg): with Timer(f"Reward {arg}"): self.env.reward_space = arg if arg else None self.rerun_stack(check_rewards=False) else: print("Unknown reward, '" + (arg if arg else "None") + "'") print("Rewards are listed with command, list_rewards")
def do_set_default_observation(self, arg): """Set the default observation space set_default_observation <name> - set the named observation The name should come from the list of observations printed by the command list_observations. Tab completion will be used if available. With no argument it will set to None. This command will rerun the actions on the stack. """ arg = arg.strip() if not arg or self.observations.count(arg): with Timer() as timer: self.env.observation_space = arg if arg else None self.rerun_stack(check_rewards=False) print(f"Observation {arg} in {timer}") else: print("Unknown observation, '" + (arg if arg else "None") + "'") print("Observations are listed with command, list_observations")
def do_observation(self, arg): """Show an observation value observation <name> - show the named observation The name should come from the list of observations printed by the command list_observations. Tab completion will be used if available. """ if arg == "" and self.env.observation_space: arg = self.env.observation_space_spec.id if self.observations.count(arg): with Timer() as timer: value = self.env.observation[arg] print(self.env.observation.spaces[arg].to_string(value)) print(f"Observation {arg} in {timer}") else: print("Unknown observation, '" + arg + "'") print("Observations are listed with command, list_observations")
def main(argv): """Main entry point.""" argv = FLAGS(argv) if len(argv) != 1: raise app.UsageError(f"Unknown command line arguments: {argv[1:]}") if FLAGS.ls_benchmark: benchmark = benchmark_from_flags() env = env_from_flags(benchmark) print("\n".join(sorted(env.benchmarks))) env.close() return with Timer("Initialized environment"): benchmark = benchmark_from_flags() env = env_from_flags(benchmark) run_manual_env(env)
def do_try_all_actions(self, args): """Tries all actions from this position and reports the results in sorted order by reward""" if not self.env.reward_space: print("No default reward set. Call set_default_reward") return with Timer("Got actions"): items = self.get_action_rewards() def row(item): return ( item.action_name, item.has_effect(), item.done, f"{item.reward:.6f}", ) rows = [row(item) for item in items] headers = ["Action", "Effect", "Done", "Reward"] print(tabulate(rows, headers=headers, tablefmt="presto"))
def main(argv): """Main entry point.""" argv = FLAGS(argv) if len(argv) != 1: raise app.UsageError(f"Unknown command line arguments: {argv[1:]}") if FLAGS.ls_benchmark: benchmark = benchmark_from_flags() env = env_from_flags(benchmark) print("\n".join(sorted(env.benchmarks))) env.close() return with Timer("Initialized environment"): # FIXME Chris, I don't seem to actually get a benchmark benchmark = benchmark_from_flags() env = env_from_flags(benchmark) shell = CompilerGymShell(env) shell.cmdloop()
def __call__(self, env: CompilerEnv, seed: int = 0xCC) -> CompilerEnvState: """Autotune the given environment. :param env: The environment to autotune. :param seed: The random seed for the autotuner. :returns: A CompilerEnvState tuple describing the autotuning result. """ # Run the autotuner in a temporary working directory and capture the # stdout/stderr. with tempfile.TemporaryDirectory(dir=transient_cache_path("."), prefix="autotune-") as tmpdir: with temporary_working_directory(Path(tmpdir)): with capture_output(): with Timer() as timer: self.autotune(env, seed=seed, **self.autotune_kwargs) return CompilerEnvState( benchmark=env.benchmark.uri, commandline=env.commandline(), walltime=timer.time, reward=self.optimization_target.final_reward(env), )
def run_manual_env(env: CompilerEnv): """Run an environment manually. The manual environment allows the user to step through the environment, selection observations, rewards, and actions to run as they see fit. This is useful for debugging. :param env: The environment to run. """ benchmark = None if not env.benchmark: # Allow the user to choose a benchmark, with the first choice being # to select randomly. benchmarks = sorted(env.benchmarks) if not benchmarks: print( "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks" ) print("Exiting...") env.close() return # Strip default benchmark:// protocol. for i, benchmark in enumerate(benchmarks): if benchmark.startswith("benchmark://"): benchmarks[i] = benchmark[len("benchmark://"):] benchmark_index = user_input.read_list_index("Benchmark", ["random"] + benchmarks) if benchmark_index: benchmark = benchmarks[benchmark_index - 1] else: benchmark = None with Timer() as timer: eager_observation = env.reset(benchmark=benchmark) print(f"Reset {env.benchmark} environment in {timer}") if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) observation_names = sorted(env.observation.spaces.keys()) reward_names = sorted(env.reward.spaces.keys()) last_eager_reward: Optional[float] = None step_count = 1 while True: print( f"\nStep {step_count}. Select: [{emph('a')}]ction " f"[{emph('o')}]bservation [{emph('r')}]eward " f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ", end="", flush=True, ) while True: c = user_input.read_char() if c == "a": print("action", flush=True) index = user_input.read_list_index("Actions", ["random"] + env.action_space.names) step_count += 1 with Timer() as t: if index == 0: # User selected "random" action. index = env.action_space.sample() else: # Offset to remove "random" action from index. index -= 1 eager_observation, eager_reward, done, info = env.step( index) # Print the eager observation, if available. if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) # Print the eager reward and the diff, if available. if env.reward_space and eager_reward is not None: reward_diff = "" if last_eager_reward is not None and eager_reward is not None: reward_diff = ( f" (change: {eager_reward - last_eager_reward:.6f})" ) print(f"Reward: {eager_reward:.6f}{reward_diff}") last_eager_reward = eager_reward print( f"Action {env.action_space.names[index]} in {t}.", " No effect." if info.get("action_had_no_effect") else "", flush=True, ) if done: print("Episode ended by environment: ", info["error_details"]) env.close() return break if c == "o": print("observation", flush=True) observation_name = user_input.read_list_value( "Observable values", observation_names) with Timer() as timer: value = env.observation[observation_name] print( env.observation.spaces[observation_name].to_string(value)) print(f"Observation {observation_name} in {timer}") break elif c == "r": print("reward", flush=True) reward_name = user_input.read_list_value( "Rewards", reward_names) with Timer(f"Reward {reward_name}"): print(f"{env.reward[reward_name]:.6f}") break elif c == "c": print("commandline") print("$", env.commandline(), flush=True) break elif c == "e": print("end", flush=True) with Timer("Closed environment"): env.close() print("Have a nice day!") return
def do_reset(self, arg): """Clear the stack of any actions and reset""" self.stack.clear() with Timer("Reset"): self.env.reset() self.set_prompt()
def from_agent(cls, env: CompilerEnv, agent, runtime: bool = True, runtimes_count: int = 30): # We calculate our own reward at the end, no need for incremental # rewards during inference. env.reward_space = None # Run inference on the environment. observation, done = env.reset(), False with Timer() as inference_timer: while not done: action = agent.compute_action(observation) observation, _, done, _ = env.step(action) instruction_count_init = env.unwrapped.observation[ "IrInstructionCountO0"] instruction_count_final = env.unwrapped.observation[ "IrInstructionCount"] instruction_count_oz = env.unwrapped.observation[ "IrInstructionCountOz"] object_size_init = env.unwrapped.observation["ObjectTextSizeO0"] object_size_final = env.unwrapped.observation["ObjectTextSizeBytes"] object_size_oz = env.unwrapped.observation["ObjectTextSizeOz"] runtimes_init = [] runtimes_o3 = [] runtimes_final = [] try: if runtime and env.unwrapped.observation["IsRunnable"]: env.send_param("llvm.set_runtimes_per_observation_count", str(runtimes_count)) env.unwrapped.observation["Runtime"] # warmup runtimes_final = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_final) == runtimes_count ), f"{len(runtimes_final)} != {runtimes_count}" env.reset() env.send_param("llvm.set_runtimes_per_observation_count", str(runtimes_count)) env.unwrapped.observation["Runtime"] # warmup runtimes_init = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_init) == runtimes_count ), f"{len(runtimes_init)} != {runtimes_count}" env.send_param("llvm.apply_baseline_optimizations", "-O3") env.unwrapped.observation["Runtime"] # warmup runtimes_o3 = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_o3) == runtimes_count ), f"{len(runtimes_o3)} != {runtimes_count}" except Exception as e: # pylint: disable=broad-except logger.warning("Failed to compute runtime: %s", e) return cls( benchmark=env.benchmark.uri, inference_walltime_seconds=inference_timer.time, commandline=env.commandline(), episode_len=len(env.actions), instruction_count_init=instruction_count_init, instruction_count_final=instruction_count_final, instruction_count_oz=instruction_count_oz, instruction_count_reduction=instruction_count_oz / max(instruction_count_final, 1), object_size_init=object_size_init, object_size_final=object_size_final, object_size_oz=object_size_oz, object_size_reduction=object_size_oz / max(object_size_final, 1), runtimes_init=runtimes_init, runtimes_final=runtimes_final, runtimes_o3=runtimes_o3, runtime_reduction=np.median(runtimes_o3 or [0]) / max(np.median(runtimes_final or [0]), 1), )
def main(argv): assert len(argv) == 1, f"Unknown arguments: {argv[1:]}" with open(FLAGS.logfile, "w") as f: print( "nproc", "episodes_per_worker", "steps_per_episode", "total_episodes", "thread_steps_per_second", "process_steps_per_second", "thread_walltime", "process_walltime", sep=",", file=f, ) for nproc in [1] + list( range(FLAGS.nproc_increment, FLAGS.max_nproc + 1, FLAGS.nproc_increment)): # Perform the same `nproc * num_episodes` random trajectories first # using threads, then using processes. threads = [ Thread( target=run_random_search, args=(FLAGS.num_episodes, FLAGS.num_steps), ) for _ in range(nproc) ] with Timer(f"Run {nproc} threaded workers") as thread_time: for thread in threads: thread.start() for thread in threads: thread.join() processes = [ Process( target=run_random_search, args=(FLAGS.num_episodes, FLAGS.num_steps), ) for _ in range(nproc) ] with Timer(f"Run {nproc} process workers") as process_time: for process in processes: process.start() for process in processes: process.join() print( nproc, FLAGS.num_episodes, FLAGS.num_steps, FLAGS.num_episodes * nproc, (FLAGS.num_episodes * FLAGS.num_steps * nproc) / thread_time.time, (FLAGS.num_episodes * FLAGS.num_steps * nproc) / process_time.time, thread_time.time, process_time.time, sep=",", file=f, flush=True, )
def main(argv): assert len(argv) == 1, f"Unknown args: {argv[:1]}" assert FLAGS.n > 0, "n must be > 0" with gym.make("llvm-ic-v0") as env: # Stream verbose CompilerGym logs to file. logger = logging.getLogger("compiler_gym") logger.setLevel(logging.DEBUG) log_handler = logging.FileHandler(FLAGS.leaderboard_logfile) logger.addHandler(log_handler) logger.propagate = False print(f"Writing results to {FLAGS.leaderboard_results}") print(f"Writing logs to {FLAGS.leaderboard_logfile}") # Build the list of benchmarks to evaluate. benchmarks = env.datasets[FLAGS.test_dataset].benchmark_uris() if FLAGS.max_benchmarks: benchmarks = islice(benchmarks, FLAGS.max_benchmarks) benchmarks = list(benchmarks) # Repeat the searches for the requested number of iterations. benchmarks *= FLAGS.n total_count = len(benchmarks) # If we are resuming from a previous job, read the states that have # already been proccessed and remove those benchmarks from the list # of benchmarks to evaluate. init_states = [] if FLAGS.resume and Path(FLAGS.leaderboard_results).is_file(): with CompilerEnvStateReader(open( FLAGS.leaderboard_results)) as reader: for state in reader: init_states.append(state) if state.benchmark in benchmarks: benchmarks.remove(state.benchmark) # Run the benchmark loop in background so that we can asynchronously # log progress. worker = _EvalPolicyWorker(env, benchmarks, policy, init_states) worker.start() timer = Timer().reset() try: print(f"=== Evaluating policy on " f"{humanize.intcomma(total_count)} " f"{FLAGS.test_dataset} benchmarks ===" "\n\n" # Blank lines will be filled below ) while worker.is_alive(): done_count = len(worker.states) remaining_count = total_count - done_count time = timer.time gmean_reward = geometric_mean( [s.reward for s in worker.states]) mean_walltime = (arithmetic_mean( [s.walltime for s in worker.states]) or time) print( "\r\033[2A" "\033[K" f"Runtime: {humanize_duration_hms(time)}. " f"Estimated completion: {humanize_duration_hms(mean_walltime * remaining_count)}. " f"Completed: {humanize.intcomma(done_count)} / {humanize.intcomma(total_count)} " f"({done_count / total_count:.1%})." "\n\033[K" f"Current mean walltime: {mean_walltime:.3f}s / benchmark." "\n\033[K" f"Current geomean reward: {gmean_reward:.4f}.", flush=True, end="", ) sleep(1) except KeyboardInterrupt: print("\nkeyboard interrupt", flush=True) worker.alive = False # User interrupt, don't validate. FLAGS.validate = False if FLAGS.validate: FLAGS.env = "llvm-ic-v0" validate(["argv0", FLAGS.leaderboard_results])