Пример #1
0
    def do_set_benchmark(self, arg):
        """Set the current benchmark.
        set_benchmark <name> - set the benchmark
        The name should come from the list of benchmarks printed by the command list_benchmarks.
        Tab completion will be used if available.
        This command will delete the action history.
        Use '-' for a random benchmark.
        """
        if arg == "-":
            arg = self.env.datasets.random_benchmark().uri
            print(f"set_benchmark {arg}")

        try:
            benchmark = self.env.datasets.benchmark(arg)
            self.stack.clear()

            # Set the current benchmark
            with Timer() as timer:
                observation = self.env.reset(benchmark=benchmark)
            print(f"Reset {self.env.benchmark} environment in {timer}")

            if self.env.observation_space and observation is not None:
                print(
                    f"Observation: {self.env.observation_space_spec.to_string(observation)}"
                )

            self.set_prompt()
        except LookupError:
            print("Unknown benchmark, '" + arg + "'")
            print("Benchmarks are listed with command, list_benchmarks")
Пример #2
0
def get_rewards(
    benchmark: Union[Benchmark, str],
    reward_space: str,
    num_trials: int,
    min_steps: int,
    max_steps: int,
    max_attempts_multiplier: int = 5,
) -> SensitivityAnalysisResult:
    """Run random trials to get a list of num_trials episode rewards."""
    rewards, runtimes = [], []
    num_attempts = 0
    while (num_attempts < max_attempts_multiplier * num_trials
           and len(rewards) < num_trials):
        num_attempts += 1
        with env_from_flags(benchmark=benchmark) as env:
            env.observation_space = None
            env.reward_space = None
            env.reset(benchmark=benchmark)
            benchmark = env.benchmark
            with Timer() as t:
                reward = run_one_trial(env, reward_space, min_steps, max_steps)
            if reward is not None:
                rewards.append(reward)
                runtimes.append(t.time)

    return SensitivityAnalysisResult(name=env.benchmark,
                                     runtimes=np.array(runtimes),
                                     rewards=np.array(rewards))
Пример #3
0
    def run(self):
        # Determine if we need to print a header.
        header = (not Path(FLAGS.leaderboard_results).is_file()
                  or os.stat(FLAGS.leaderboard_results).st_size == 0)
        with CompilerEnvStateWriter(open(FLAGS.leaderboard_results, "a"),
                                    header=header) as writer:
            for benchmark in self.benchmarks:
                self.env.reset(benchmark=benchmark)
                with Timer() as timer:
                    self.policy(self.env)

                # Sanity check that the policy didn't change the expected
                # experimental setup.
                assert self.env.in_episode, "Environment is no longer in an episode"
                assert self.env.benchmark and (
                    self.env.benchmark
                    == benchmark), "Policy changed environment benchmark"
                assert self.env.reward_space, "Policy unset environment reward space"
                assert (self.env.reward_space.name == "IrInstructionCountOz"
                        ), "Policy changed environment reward space"

                # Override walltime in the generated state.
                state = self.env.state.copy()
                state.walltime = timer.time

                writer.write_state(state, flush=True)
                self.states.append(state)

                if not self.alive:
                    return
def get_rewards(
    action: int,
    action_name: str,
    reward_space: str,
    num_trials: int,
    max_warmup_steps: int,
    max_attempts_multiplier: int = 5,
) -> SensitivityAnalysisResult:
    """Run random trials to get a list of num_trials immediate rewards."""
    rewards, runtimes = [], []
    benchmark = benchmark_from_flags()
    num_attempts = 0
    while (
        num_attempts < max_attempts_multiplier * num_trials
        and len(rewards) < num_trials
    ):
        num_attempts += 1
        with env_from_flags(benchmark=benchmark) as env:
            env.observation_space = None
            env.reward_space = None
            env.reset(benchmark=benchmark)
            with Timer() as t:
                reward = run_one_trial(env, reward_space, action, max_warmup_steps)
            if reward is not None:
                rewards.append(reward)
                runtimes.append(t.time)

    return SensitivityAnalysisResult(
        name=action_name, runtimes=np.array(runtimes), rewards=np.array(rewards)
    )
Пример #5
0
    def flush(self) -> None:
        """Flush the buffered steps and observations to database."""
        n_steps, n_observations = len(self.step_buffer), len(self.observations_buffer)

        # Nothing to flush.
        if not n_steps:
            return

        with Timer() as flush_time:
            # House keeping notice: Keep these statements in sync with record().
            self.cursor.executemany(
                "INSERT OR IGNORE INTO States VALUES (?, ?, ?, ?, ?)",
                self.step_buffer,
            )
            self.cursor.executemany(
                "INSERT OR IGNORE INTO Observations VALUES (?, ?, ?, ?, ?, ?)",
                ((k, *v) for k, v in self.observations_buffer.items()),
            )
            self.step_buffer = []
            self.observations_buffer = {}

            self.connection.commit()

        logging.info(
            "Wrote %d state records and %d observations in %s. Last flush %s ago",
            n_steps,
            n_observations,
            flush_time,
            humanize_duration(time() - self.last_commit),
        )
        self.last_commit = time()
Пример #6
0
    def do_greedy(self, arg):
        """Do some greedy steps.
        All actions are tried and the one with the biggest positive reward is accepted.
        An argument, if given, should be the number of steps to take.
        The search will try to improve the default reward. Please call set_default_reward if needed.
        """
        if not self.env.reward_space:
            print("No default reward set. Call set_default_reward")
            return

        try:
            num_steps = max(1, int(arg))
        except ValueError:
            num_steps = 1

        with Timer() as timer:
            for i in range(num_steps):
                best = self.get_action_rewards()[0]
                if (not best.done) and (best.reward
                                        is not None) and (best.reward > 0):
                    self.env.step(best.action_index)
                    self.stack.append(best)
                    print(
                        f"Step: {i+1} Selected action: {best.action_name} Reward: {best.reward:.6f}"
                    )
                else:
                    print(f"Step: {i+1} Selected no action.")
                    if i + 1 < num_steps:
                        print("Greedy search stopping early.")
                    break

        print(f"Greedy {i+1} steps in {timer}")
Пример #7
0
def validate_state(env: CompilerEnv,
                   state: CompilerEnvState) -> ValidationResult:
    """Validate a :class:`CompilerEnvState <compiler_gym.envs.CompilerEnvState>`.

    :param env: A compiler environment.
    :param state: The environment state to validate.
    :return: A :class:`ValidationResult <compiler_gym.ValidationResult>` instance.
    """
    error_messages = []
    validation = {
        "state": state,
        "actions_replay_failed": False,
        "reward_validated": False,
        "reward_validation_failed": False,
        "benchmark_semantics_validated": False,
        "benchmark_semantics_validation_failed": False,
    }

    if state.reward is not None and env.reward_space is None:
        raise ValueError("Reward space not specified")

    with Timer() as walltime:
        env.reset(benchmark=state.benchmark)
        # Use a while loop here so that we can `break` early out of the
        # validation process in case a step fails.
        while True:
            try:
                reward = _llvm_replay_commandline(env, state.commandline)
            except (ValueError, OSError) as e:
                validation["actions_replay_failed"] = True
                error_messages.append(str(e))
                break

            if state.reward is not None and env.reward_space.deterministic:
                validation["reward_validated"] = True
                # If reward deviates from the expected amount record the
                # error but continue with the remainder of the validation.
                if not math.isclose(
                        reward, state.reward, rel_tol=1e-5, abs_tol=1e-10):
                    validation["reward_validation_failed"] = True
                    error_messages.append(
                        f"Expected reward {state.reward:.4f} but received reward {reward:.4f}"
                    )

            validate_semantics = LLVM_BENCHMARK_VALIDATION_CALLBACKS.get(
                state.benchmark)
            if validate_semantics:
                validation["benchmark_semantics_validated"] = True
                semantics_error = validate_semantics(env)
                if semantics_error:
                    validation["benchmark_semantics_validation_failed"] = True
                    error_messages.append(semantics_error)

            # Finished all checks, break the loop.
            break

    return ValidationResult(walltime=walltime.time,
                            error_details="\n".join(error_messages),
                            **validation)
Пример #8
0
def train(dataset, data_loader, model, num_epoch, device):
    optimizer = torch.optim.Adam(model.parameters())
    for epoch in range(num_epoch):
        with Timer(f"Epoch {epoch + 1} of {num_epoch} ({(epoch + 1) / num_epoch:.1%})"):
            dataset.set_distribution_type("train")
            dataset_looper(epoch, data_loader, model, device, optimizer)
            dataset.set_distribution_type("dev")
            dataset_looper(epoch, data_loader, model, device, train=False)
Пример #9
0
 def do_back(self, arg):
     """Undo the last action, if any"""
     if self.stack:
         top = self.stack.pop()
         with Timer(f"Undid {top.action_name}"):
             self.rerun_stack()
     else:
         print("No actions to undo")
Пример #10
0
def run_random_walk(env: CompilerEnv, step_count: int) -> List[float]:
    """Perform a random walk of the action space.

    :param env: The environment to use.
    :param step_count: The number of steps to run. This value is an upper bound -
        fewer steps will be performed if any of the actions lead the
        environment to end the episode.
    :return: The list of observed rewards.
    """
    rewards = []

    step_num = 0
    with Timer() as episode_time:
        env.reset()
        for step_num in range(1, step_count + 1):
            action_index = env.action_space.sample()
            with Timer() as step_time:
                observation, reward, done, info = env.step(action_index)
            print(f"\n=== Step {humanize.intcomma(step_num)} ===")
            print(f"Action:       {env.action_space.names[action_index]} "
                  f"(changed={not info.get('action_had_no_effect')})")
            rewards.append(reward)
            print(f"Reward:       {reward}")
            if env._eager_observation:
                print(f"Observation:\n{observation}")
            print(f"Step time:    {step_time}")
            if done:
                print("Episode ended by environment")
                break
        env.close()

    def reward_delta(reward):
        delta = rewards[0] / max(reward, 1e-9) - 1
        return emph(f"{'+' if delta >= 0 else ''}{delta:.2%}")

    print(
        f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} "
        f"({step_num / episode_time.time:.1f} steps / sec).")
    print(f"Init reward:  {rewards[0]}")
    print(f"Final reward: {rewards[-1]} ({reward_delta(rewards[-1])})")
    print(f"Max reward:   {max(rewards)} ({reward_delta(max(rewards))} "
          f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})")
    return rewards
Пример #11
0
def get_runtimes(op: Callable[[], Any], n: int):
    """Run `n` reptitions of function `op`, ignoring any errors."""
    runtimes = []
    for _ in range(n):
        try:
            with Timer() as timer:
                op()
            runtimes.append(timer.time)
        except Exception as e:  # pylint: disable=broad-except
            logger.warning("Op failed: %s", e)
    return runtimes
Пример #12
0
    def get_step_times(env: CompilerEnv, num_steps: int, batched=False):
        while batched:
            # Run all actions in a single step().
            steps = [env.action_space.sample() for _ in range(num_steps)]
            with Timer() as timer:
                _, _, done, _ = env.step(steps)
            if not done:
                return [timer.time / num_steps] * num_steps
            env.reset()

        # Run each action as a step().
        runtimes = []
        while len(runtimes) < num_steps:
            with Timer() as timer:
                _, _, done, _ = env.step(env.action_space.sample())
            if done:
                env.reset()
            else:
                runtimes.append(timer.time)
        return runtimes
Пример #13
0
def run_random_walk(env: CompilerEnv, step_count: int) -> None:
    """Perform a random walk of the action space.

    :param env: The environment to use.
    :param step_count: The number of steps to run. This value is an upper bound -
        fewer steps will be performed if any of the actions lead the
        environment to end the episode.
    """
    rewards = []

    step_num = 0
    with Timer() as episode_time:
        env.reset()
        for step_num in range(1, step_count + 1):
            action_index = env.action_space.sample()
            with Timer() as step_time:
                observation, reward, done, info = env.step(action_index)
            print(f"\n=== Step {humanize.intcomma(step_num)} ===\n"
                  f"Action:       {env.action_space.names[action_index]} "
                  f"(changed={not info.get('action_had_no_effect')})\n"
                  f"Reward:       {reward}")
            rewards.append(reward)
            if env.observation_space:
                print(f"Observation:\n{observation}")
            print(f"Step time:    {step_time}")
            if done:
                print("Episode ended by environment")
                break

    def reward_percentage(reward, rewards):
        if sum(rewards) == 0:
            return 0
        percentage = reward / sum(rewards)
        return emph(f"{'+' if percentage >= 0 else ''}{percentage:.2%}")

    print(
        f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} "
        f"({step_num / episode_time.time:.1f} steps / sec).\n"
        f"Total reward: {sum(rewards)}\n"
        f"Max reward:   {max(rewards)} ({reward_percentage(max(rewards), rewards)} "
        f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})")
Пример #14
0
def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    with Timer("Initialized environment"):
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)

    shell = CompilerGymShell(env)
    shell.cmdloop()
Пример #15
0
 def do_require_dataset(self, arg):
     """Require dataset
     The argument is the name of the dataset to require.
     """
     if self.get_datasets().count(arg):
         with Timer(f"Downloaded dataset {arg}"):
             require(self.env, arg)
         self.init_benchmarks()
         # FIXME CHRIS, why can't I get it to update the list of benchmarks?
         # I have to restart
         print("Application must be restarted to make changes visible.")
     else:
         print("Unknown dataset, '" + arg + "'")
         print("Available datasets are listed with command, list_available_datasets")
Пример #16
0
    def do_hill_climb(self, arg):
        """Do some steps of hill climbing.
        A random action is taken, but only accepted if it has a positive reward.
        An argument, if given, should be the number of steps to take.
        The search will try to improve the default reward. Please call set_default_reward if needed.
        """
        if not self.env.benchmark:
            print("No benchmark set, please call the set_benchmark command")
            return

        if not self.env.reward_space:
            print("No default reward set. Call set_default_reward")
            return

        try:
            num_steps = max(1, int(arg))
        except ValueError:
            num_steps = 1

        num_accepted = 0
        cum_reward = 0
        with Timer() as timer:
            for i in range(num_steps):
                index = random.randrange(self.env.action_space.n)
                action = self.env.action_space.names[index]

                observation, reward, done, info = self.env.step(index)

                accept = not done and (reward is not None) and (reward > 0)
                if accept:
                    # Append the history element
                    hist = ActionHistoryElement(
                        action, index, observation, reward, done, info
                    )
                    self.stack.append(hist)
                    num_accepted += 1
                    cum_reward += reward
                else:
                    # Basically undo
                    self.rerun_stack()

                print(
                    f"Step: {i+1} Action: {action} Reward: {reward:.6f} Accept: {accept}"
                )
                if done:
                    print("Episode ended by environment: ", info["error_details"])
        print(
            f"Hill climb complete in {timer}. Accepted {num_accepted} of {num_steps} steps for total reward of {cum_reward}."
        )
Пример #17
0
def main(argv):
    # Initialize a Q table.
    q_table: Dict[StateActionTuple, float] = {}
    benchmark = benchmark_from_flags()
    assert benchmark, "You must specify a benchmark using the --benchmark flag"

    with gym.make("llvm-ic-v0", benchmark=benchmark) as env:
        env.observation_space = "Autophase"

        # Train a Q-table.
        with Timer("Constructing Q-table"):
            train(q_table, env)

        # Rollout resulting policy.
        rollout(q_table, env, printout=True)
Пример #18
0
    def do_reward(self, arg):
        """Show an reward value
        reward <name> - show the named reward
        The name should come from the list of rewards printed by the command list_rewards.
        Tab completion will be used if available.
        """
        if arg == "" and self.env.reward_space:
            arg = self.env.reward_space.id

        if self.rewards.count(arg):
            with Timer(f"Reward {arg}"):
                print(f"{self.env.reward[arg]:.6f}")
        else:
            print(f"Unknown reward, '{arg}'")
            print("Rewards are listed with command, list_rewards")
Пример #19
0
 def do_set_default_reward(self, arg):
     """Set the default reward space
     set_default_reward <name> - set the named reward
     The name should come from the list of rewards printed by the command list_rewards.
     Tab completion will be used if available.
     With no argument it will set to None.
     This command will rerun the actions on the stack.
     """
     arg = arg.strip()
     if not arg or self.rewards.count(arg):
         with Timer(f"Reward {arg}"):
             self.env.reward_space = arg if arg else None
             self.rerun_stack(check_rewards=False)
     else:
         print("Unknown reward, '" + (arg if arg else "None") + "'")
         print("Rewards are listed with command, list_rewards")
Пример #20
0
 def do_set_default_observation(self, arg):
     """Set the default observation space
     set_default_observation <name> - set the named observation
     The name should come from the list of observations printed by the command list_observations.
     Tab completion will be used if available.
     With no argument it will set to None.
     This command will rerun the actions on the stack.
     """
     arg = arg.strip()
     if not arg or self.observations.count(arg):
         with Timer() as timer:
             self.env.observation_space = arg if arg else None
             self.rerun_stack(check_rewards=False)
         print(f"Observation {arg} in {timer}")
     else:
         print("Unknown observation, '" + (arg if arg else "None") + "'")
         print("Observations are listed with command, list_observations")
Пример #21
0
    def do_observation(self, arg):
        """Show an observation value
        observation <name> - show the named observation
        The name should come from the list of observations printed by the command list_observations.
        Tab completion will be used if available.
        """
        if arg == "" and self.env.observation_space:
            arg = self.env.observation_space_spec.id

        if self.observations.count(arg):
            with Timer() as timer:
                value = self.env.observation[arg]
            print(self.env.observation.spaces[arg].to_string(value))
            print(f"Observation {arg} in {timer}")
        else:
            print("Unknown observation, '" + arg + "'")
            print("Observations are listed with command, list_observations")
Пример #22
0
def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    if FLAGS.ls_benchmark:
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)
        print("\n".join(sorted(env.benchmarks)))
        env.close()
        return

    with Timer("Initialized environment"):
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)

    run_manual_env(env)
Пример #23
0
    def do_try_all_actions(self, args):
        """Tries all actions from this position and reports the results in sorted order by reward"""
        if not self.env.reward_space:
            print("No default reward set. Call set_default_reward")
            return

        with Timer("Got actions"):
            items = self.get_action_rewards()

        def row(item):
            return (
                item.action_name,
                item.has_effect(),
                item.done,
                f"{item.reward:.6f}",
            )

        rows = [row(item) for item in items]
        headers = ["Action", "Effect", "Done", "Reward"]
        print(tabulate(rows, headers=headers, tablefmt="presto"))
Пример #24
0
def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    if FLAGS.ls_benchmark:
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)
        print("\n".join(sorted(env.benchmarks)))
        env.close()
        return

    with Timer("Initialized environment"):
        # FIXME Chris, I don't seem to actually get a benchmark
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)

    shell = CompilerGymShell(env)
    shell.cmdloop()
Пример #25
0
    def __call__(self, env: CompilerEnv, seed: int = 0xCC) -> CompilerEnvState:
        """Autotune the given environment.

        :param env: The environment to autotune.

        :param seed: The random seed for the autotuner.

        :returns: A CompilerEnvState tuple describing the autotuning result.
        """
        # Run the autotuner in a temporary working directory and capture the
        # stdout/stderr.
        with tempfile.TemporaryDirectory(dir=transient_cache_path("."),
                                         prefix="autotune-") as tmpdir:
            with temporary_working_directory(Path(tmpdir)):
                with capture_output():
                    with Timer() as timer:
                        self.autotune(env, seed=seed, **self.autotune_kwargs)

        return CompilerEnvState(
            benchmark=env.benchmark.uri,
            commandline=env.commandline(),
            walltime=timer.time,
            reward=self.optimization_target.final_reward(env),
        )
Пример #26
0
def run_manual_env(env: CompilerEnv):
    """Run an environment manually.

    The manual environment allows the user to step through the environment,
    selection observations, rewards, and actions to run as they see fit. This is
    useful for debugging.

    :param env: The environment to run.
    """
    benchmark = None
    if not env.benchmark:
        # Allow the user to choose a benchmark, with the first choice being
        # to select randomly.
        benchmarks = sorted(env.benchmarks)
        if not benchmarks:
            print(
                "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks"
            )
            print("Exiting...")
            env.close()
            return

        # Strip default benchmark:// protocol.
        for i, benchmark in enumerate(benchmarks):
            if benchmark.startswith("benchmark://"):
                benchmarks[i] = benchmark[len("benchmark://"):]

        benchmark_index = user_input.read_list_index("Benchmark",
                                                     ["random"] + benchmarks)
        if benchmark_index:
            benchmark = benchmarks[benchmark_index - 1]
        else:
            benchmark = None

    with Timer() as timer:
        eager_observation = env.reset(benchmark=benchmark)

    print(f"Reset {env.benchmark} environment in {timer}")
    if env.observation_space and eager_observation is not None:
        print(
            f"Observation: {env.observation_space.to_string(eager_observation)}"
        )

    observation_names = sorted(env.observation.spaces.keys())
    reward_names = sorted(env.reward.spaces.keys())
    last_eager_reward: Optional[float] = None
    step_count = 1

    while True:
        print(
            f"\nStep {step_count}. Select: [{emph('a')}]ction "
            f"[{emph('o')}]bservation [{emph('r')}]eward "
            f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ",
            end="",
            flush=True,
        )
        while True:
            c = user_input.read_char()
            if c == "a":
                print("action", flush=True)
                index = user_input.read_list_index("Actions", ["random"] +
                                                   env.action_space.names)
                step_count += 1
                with Timer() as t:
                    if index == 0:
                        # User selected "random" action.
                        index = env.action_space.sample()
                    else:
                        # Offset to remove "random" action from index.
                        index -= 1
                    eager_observation, eager_reward, done, info = env.step(
                        index)

                # Print the eager observation, if available.
                if env.observation_space and eager_observation is not None:
                    print(
                        f"Observation: {env.observation_space.to_string(eager_observation)}"
                    )

                # Print the eager reward and the diff, if available.
                if env.reward_space and eager_reward is not None:
                    reward_diff = ""
                    if last_eager_reward is not None and eager_reward is not None:
                        reward_diff = (
                            f" (change: {eager_reward - last_eager_reward:.6f})"
                        )
                    print(f"Reward: {eager_reward:.6f}{reward_diff}")
                    last_eager_reward = eager_reward

                print(
                    f"Action {env.action_space.names[index]} in {t}.",
                    " No effect." if info.get("action_had_no_effect") else "",
                    flush=True,
                )
                if done:
                    print("Episode ended by environment: ",
                          info["error_details"])
                    env.close()
                    return
                break
            if c == "o":
                print("observation", flush=True)
                observation_name = user_input.read_list_value(
                    "Observable values", observation_names)
                with Timer() as timer:
                    value = env.observation[observation_name]
                print(
                    env.observation.spaces[observation_name].to_string(value))
                print(f"Observation {observation_name} in {timer}")
                break
            elif c == "r":
                print("reward", flush=True)
                reward_name = user_input.read_list_value(
                    "Rewards", reward_names)
                with Timer(f"Reward {reward_name}"):
                    print(f"{env.reward[reward_name]:.6f}")
                break
            elif c == "c":
                print("commandline")
                print("$", env.commandline(), flush=True)
                break
            elif c == "e":
                print("end", flush=True)
                with Timer("Closed environment"):
                    env.close()
                print("Have a nice day!")
                return
Пример #27
0
 def do_reset(self, arg):
     """Clear the stack of any actions and reset"""
     self.stack.clear()
     with Timer("Reset"):
         self.env.reset()
     self.set_prompt()
    def from_agent(cls,
                   env: CompilerEnv,
                   agent,
                   runtime: bool = True,
                   runtimes_count: int = 30):
        # We calculate our own reward at the end, no need for incremental
        # rewards during inference.
        env.reward_space = None

        # Run inference on the environment.
        observation, done = env.reset(), False
        with Timer() as inference_timer:
            while not done:
                action = agent.compute_action(observation)
                observation, _, done, _ = env.step(action)

        instruction_count_init = env.unwrapped.observation[
            "IrInstructionCountO0"]
        instruction_count_final = env.unwrapped.observation[
            "IrInstructionCount"]
        instruction_count_oz = env.unwrapped.observation[
            "IrInstructionCountOz"]

        object_size_init = env.unwrapped.observation["ObjectTextSizeO0"]
        object_size_final = env.unwrapped.observation["ObjectTextSizeBytes"]
        object_size_oz = env.unwrapped.observation["ObjectTextSizeOz"]

        runtimes_init = []
        runtimes_o3 = []
        runtimes_final = []

        try:
            if runtime and env.unwrapped.observation["IsRunnable"]:
                env.send_param("llvm.set_runtimes_per_observation_count",
                               str(runtimes_count))
                env.unwrapped.observation["Runtime"]  # warmup
                runtimes_final = env.unwrapped.observation["Runtime"].tolist()
                assert (len(runtimes_final) == runtimes_count
                        ), f"{len(runtimes_final)} != {runtimes_count}"

                env.reset()
                env.send_param("llvm.set_runtimes_per_observation_count",
                               str(runtimes_count))
                env.unwrapped.observation["Runtime"]  # warmup
                runtimes_init = env.unwrapped.observation["Runtime"].tolist()
                assert (len(runtimes_init) == runtimes_count
                        ), f"{len(runtimes_init)} != {runtimes_count}"

                env.send_param("llvm.apply_baseline_optimizations", "-O3")
                env.unwrapped.observation["Runtime"]  # warmup
                runtimes_o3 = env.unwrapped.observation["Runtime"].tolist()
                assert (len(runtimes_o3) == runtimes_count
                        ), f"{len(runtimes_o3)} != {runtimes_count}"
        except Exception as e:  # pylint: disable=broad-except
            logger.warning("Failed to compute runtime: %s", e)

        return cls(
            benchmark=env.benchmark.uri,
            inference_walltime_seconds=inference_timer.time,
            commandline=env.commandline(),
            episode_len=len(env.actions),
            instruction_count_init=instruction_count_init,
            instruction_count_final=instruction_count_final,
            instruction_count_oz=instruction_count_oz,
            instruction_count_reduction=instruction_count_oz /
            max(instruction_count_final, 1),
            object_size_init=object_size_init,
            object_size_final=object_size_final,
            object_size_oz=object_size_oz,
            object_size_reduction=object_size_oz / max(object_size_final, 1),
            runtimes_init=runtimes_init,
            runtimes_final=runtimes_final,
            runtimes_o3=runtimes_o3,
            runtime_reduction=np.median(runtimes_o3 or [0]) /
            max(np.median(runtimes_final or [0]), 1),
        )
Пример #29
0
def main(argv):
    assert len(argv) == 1, f"Unknown arguments: {argv[1:]}"

    with open(FLAGS.logfile, "w") as f:
        print(
            "nproc",
            "episodes_per_worker",
            "steps_per_episode",
            "total_episodes",
            "thread_steps_per_second",
            "process_steps_per_second",
            "thread_walltime",
            "process_walltime",
            sep=",",
            file=f,
        )

        for nproc in [1] + list(
                range(FLAGS.nproc_increment, FLAGS.max_nproc + 1,
                      FLAGS.nproc_increment)):
            # Perform the same `nproc * num_episodes` random trajectories first
            # using threads, then using processes.
            threads = [
                Thread(
                    target=run_random_search,
                    args=(FLAGS.num_episodes, FLAGS.num_steps),
                ) for _ in range(nproc)
            ]
            with Timer(f"Run {nproc} threaded workers") as thread_time:
                for thread in threads:
                    thread.start()
                for thread in threads:
                    thread.join()

            processes = [
                Process(
                    target=run_random_search,
                    args=(FLAGS.num_episodes, FLAGS.num_steps),
                ) for _ in range(nproc)
            ]
            with Timer(f"Run {nproc} process workers") as process_time:
                for process in processes:
                    process.start()
                for process in processes:
                    process.join()

            print(
                nproc,
                FLAGS.num_episodes,
                FLAGS.num_steps,
                FLAGS.num_episodes * nproc,
                (FLAGS.num_episodes * FLAGS.num_steps * nproc) /
                thread_time.time,
                (FLAGS.num_episodes * FLAGS.num_steps * nproc) /
                process_time.time,
                thread_time.time,
                process_time.time,
                sep=",",
                file=f,
                flush=True,
            )
Пример #30
0
    def main(argv):
        assert len(argv) == 1, f"Unknown args: {argv[:1]}"
        assert FLAGS.n > 0, "n must be > 0"

        with gym.make("llvm-ic-v0") as env:

            # Stream verbose CompilerGym logs to file.
            logger = logging.getLogger("compiler_gym")
            logger.setLevel(logging.DEBUG)
            log_handler = logging.FileHandler(FLAGS.leaderboard_logfile)
            logger.addHandler(log_handler)
            logger.propagate = False

            print(f"Writing results to {FLAGS.leaderboard_results}")
            print(f"Writing logs to {FLAGS.leaderboard_logfile}")

            # Build the list of benchmarks to evaluate.
            benchmarks = env.datasets[FLAGS.test_dataset].benchmark_uris()
            if FLAGS.max_benchmarks:
                benchmarks = islice(benchmarks, FLAGS.max_benchmarks)
            benchmarks = list(benchmarks)

            # Repeat the searches for the requested number of iterations.
            benchmarks *= FLAGS.n
            total_count = len(benchmarks)

            # If we are resuming from a previous job, read the states that have
            # already been proccessed and remove those benchmarks from the list
            # of benchmarks to evaluate.
            init_states = []
            if FLAGS.resume and Path(FLAGS.leaderboard_results).is_file():
                with CompilerEnvStateReader(open(
                        FLAGS.leaderboard_results)) as reader:
                    for state in reader:
                        init_states.append(state)
                        if state.benchmark in benchmarks:
                            benchmarks.remove(state.benchmark)

            # Run the benchmark loop in background so that we can asynchronously
            # log progress.
            worker = _EvalPolicyWorker(env, benchmarks, policy, init_states)
            worker.start()
            timer = Timer().reset()
            try:
                print(f"=== Evaluating policy on "
                      f"{humanize.intcomma(total_count)} "
                      f"{FLAGS.test_dataset} benchmarks ==="
                      "\n\n"  # Blank lines will be filled below
                      )
                while worker.is_alive():
                    done_count = len(worker.states)
                    remaining_count = total_count - done_count
                    time = timer.time
                    gmean_reward = geometric_mean(
                        [s.reward for s in worker.states])
                    mean_walltime = (arithmetic_mean(
                        [s.walltime for s in worker.states]) or time)
                    print(
                        "\r\033[2A"
                        "\033[K"
                        f"Runtime: {humanize_duration_hms(time)}. "
                        f"Estimated completion: {humanize_duration_hms(mean_walltime * remaining_count)}. "
                        f"Completed: {humanize.intcomma(done_count)} / {humanize.intcomma(total_count)} "
                        f"({done_count / total_count:.1%})."
                        "\n\033[K"
                        f"Current mean walltime: {mean_walltime:.3f}s / benchmark."
                        "\n\033[K"
                        f"Current geomean reward: {gmean_reward:.4f}.",
                        flush=True,
                        end="",
                    )
                    sleep(1)
            except KeyboardInterrupt:
                print("\nkeyboard interrupt", flush=True)
                worker.alive = False
                # User interrupt, don't validate.
                FLAGS.validate = False

        if FLAGS.validate:
            FLAGS.env = "llvm-ic-v0"
            validate(["argv0", FLAGS.leaderboard_results])