Пример #1
0
def test_service_env_dies_reset(env: CompilerEnv):
    env.observation_space = "Autophase"
    env.reward_space = "IrInstructionCount"
    env.reset("cbench-v1/crc32")

    # Kill the service. Note killing the service for a ManagedConnection will
    # result in a ServiceError because we have not ended the session we started
    # with env.reset() above. For UnmanagedConnection, this error will not be
    # raised.
    try:
        env.service.close()
    except ServiceError as e:
        assert "Service exited with returncode " in str(e)

    # Check that the environment doesn't fall over.
    observation, reward, done, info = env.step(0)
    assert done, info["error_details"]
    assert not env.in_episode

    # Check that default values are returned.
    np.testing.assert_array_equal(observation, np.zeros(AUTOPHASE_FEATURE_DIM))
    assert reward == 0

    # Reset the environment and check that it works.
    env.reset(benchmark="cbench-v1/crc32")
    assert env.in_episode

    observation, reward, done, info = env.step(0)
    assert not done, info["error_details"]
    assert observation is not None
    assert reward is not None
Пример #2
0
def test_service_env_dies_reset(env: CompilerEnv):
    env.observation_space = "Autophase"
    env.reward_space = "IrInstructionCount"
    env.reset("cBench-v0/crc32")

    # Kill the service.
    env.service.close()

    # Check that the environment doesn't fall over.
    observation, reward, done, info = env.step(0)
    assert done, info["error_details"]
    assert not env.in_episode

    # Check that default values are returned.
    np.testing.assert_array_equal(observation, np.zeros(AUTOPHASE_FEATURE_DIM))
    assert reward == 0

    # Reset the environment and check that it works.
    env.reset(benchmark="cBench-v0/crc32")
    assert env.in_episode

    observation, reward, done, info = env.step(0)
    assert not done, info["error_details"]
    assert observation is not None
    assert reward is not None
Пример #3
0
def test_double_reset(env: CompilerEnv):
    """Test that reset() can be called twice."""
    env.reset()
    assert env.in_episode
    env.step(env.action_space.sample())
    env.reset()
    env.step(env.action_space.sample())
    assert env.in_episode
Пример #4
0
def test_double_reset_with_step(env: CompilerEnv):
    """Test that reset() can be called twice with a step."""
    env.reset()
    assert env.in_episode
    _, _, done, info = env.step(env.action_space.sample())
    assert not done, info
    env.reset()
    _, _, done, info = env.step(env.action_space.sample())
    assert not done, info
    assert env.in_episode
Пример #5
0
def test_fork(env: CompilerEnv):
    env.reset()
    env.step(0)
    env.step(1)
    other_env = env.fork()
    try:
        assert env.benchmark == other_env.benchmark
        assert other_env.actions == [0, 1]
    finally:
        other_env.close()
Пример #6
0
def nevergrad(env: CompilerEnv,
              optimization_target: OptimizationTarget,
              search_time_seconds: int,
              seed: int,
              episode_length: int = 100,
              optimizer: str = "DiscreteLenglerOnePlusOne",
              **kwargs) -> None:
    """Optimize an environment using nevergrad.

    Nevergrad is a gradient-free optimization platform that provides
    implementations of various black box optimizations techniques:

        https://facebookresearch.github.io/nevergrad/
    """
    if optimization_target == OptimizationTarget.RUNTIME:

        def calculate_negative_reward(actions: Tuple[int]) -> float:
            env.reset()
            env.step(actions)
            return -env.episode_reward

    else:
        # Only cache the deterministic non-runtime rewards.
        @lru_cache(maxsize=int(1e4))
        def calculate_negative_reward(actions: Tuple[int]) -> float:
            env.reset()
            env.step(actions)
            return -env.episode_reward

    params = ng.p.Choice(
        choices=range(env.action_space.n),
        repetitions=episode_length,
        deterministic=True,
    )
    params.random_state.seed(seed)

    optimizer_class = getattr(ng.optimizers, optimizer)
    optimizer = optimizer_class(parametrization=params,
                                budget=1,
                                num_workers=1)

    end_time = time() + search_time_seconds
    while time() < end_time:
        x = optimizer.ask()
        optimizer.tell(x, calculate_negative_reward(x.value))

    # Get best solution and replay it.
    recommendation = optimizer.provide_recommendation()
    env.reset()
    env.step(recommendation.value)
Пример #7
0
def test_eager_reward(env: CompilerEnv):
    env.eager_reward_space = "codesize"
    env.reset()
    observation, reward, done, info = env.step(0)
    assert observation is None
    assert reward == 0
    assert not done
Пример #8
0
    def run_one_episode(self, env: CompilerEnv) -> bool:
        """Run a single random episode.

        :param env: An environment.
        :return: True if the episode ended gracefully, else False.
        """
        observation = env.reset()
        actions: List[int] = []
        patience = self._patience
        total_returns = 0
        while patience >= 0:
            patience -= 1
            self.total_step_count += 1
            # === Your agent here! ===
            action_index = env.action_space.sample()
            # === End of agent. ===
            actions.append(action_index)
            observation, reward, done, _ = env.step(action_index)
            if done:
                return False
            total_returns += reward
            if total_returns > self.best_returns:
                patience = self._patience
                self.best_returns = total_returns
                self.best_actions = actions.copy()
                self.best_commandline = env.commandline()
                self.best_found_at_time = time()

        return True
def run_one_trial(
    env: CompilerEnv, reward_space: str, action: int, max_warmup_steps: int
) -> Optional[float]:
    """Run a random number of "warmup" steps in an environment, then compute
    the immediate reward of the given action.

    :return: An immediate reward.
    """
    num_warmup_steps = random.randint(0, max_warmup_steps)
    warmup_actions = [env.action_space.sample() for _ in range(num_warmup_steps)]
    env.reward_space = reward_space
    _, _, done, _ = env.step(warmup_actions)
    if done:
        return None
    _, (reward,), done, _ = env.step(action, rewards=[reward_space])
    return None if done else reward
Пример #10
0
 def eval_action(fkd: CompilerEnv, action: int) -> RewardAction:
     """Evaluate the given action."""
     try:
         _, reward, _, _ = fkd.step(action)
     finally:
         fkd.close()
     return RewardAction(reward=reward, action=action)
Пример #11
0
def test_default_reward(env: CompilerEnv):
    """Test default reward space."""
    env.reward_space = "runtime"
    env.reset()
    observation, reward, done, info = env.step(0)
    assert observation is None
    assert reward == 0
    assert not done
Пример #12
0
def test_eager_ir_observation(env: CompilerEnv):
    env.eager_observation_space = "ir"
    observation = env.reset()
    assert observation == "Hello, world!"

    observation, reward, done, info = env.step(0)
    assert observation == "Hello, world!"
    assert reward is None
    assert not done
Пример #13
0
def test_default_ir_observation(env: CompilerEnv):
    """Test default observation space."""
    env.observation_space = "ir"
    observation = env.reset()
    assert len(observation) > 0

    observation, reward, done, info = env.step(0)
    assert not done, info
    assert len(observation) > 0
    assert reward is None
Пример #14
0
def test_default_ir_observation(env: CompilerEnv):
    """Test default observation space."""
    env.observation_space = "ir"
    observation = env.reset()
    assert observation == "Hello, world!"

    observation, reward, done, info = env.step(0)
    assert observation == "Hello, world!"
    assert reward is None
    assert not done
Пример #15
0
def test_service_env_dies_reset(env: CompilerEnv):
    env.observation_space = "Autophase"
    env.reward_space = "IrInstructionCount"
    env.reset("cBench-v0/crc32")

    # Kill the service.
    env.service.close()

    # Check that the environment doesn't fall over.
    observation, reward, done, _ = env.step(0)
    assert done
    assert observation is None
    assert reward is None

    # Reset the environment and check that it works.
    env.reset(benchmark="cBench-v0/crc32")
    observation, reward, done, _ = env.step(0)
    assert not done
    assert observation is not None
    assert reward is not None
def test_step(env: CompilerEnv, action_name: str):
    """Run each action on a single benchmark."""
    env.reward_space = "IrInstructionCount"
    env.observation_space = "Autophase"
    env.reset(benchmark="cBench-v0/crc32")
    observation, reward, done, _ = env.step(
        env.action_space.from_string(action_name))

    assert isinstance(observation, np.ndarray)
    assert observation.shape == (AUTOPHASE_FEATURE_DIM, )
    assert isinstance(reward, float)
    assert isinstance(done, bool)
Пример #17
0
def run_one_trial(env: CompilerEnv, reward_space: str, action: int,
                  max_warmup_steps: int) -> Optional[float]:
    """Run a random number of "warmup" steps in an environment, then compute
    the reward delta of the given action.

        :return: The ratio of reward improvement.
    """
    num_warmup_steps = random.randint(0, max_warmup_steps)
    for _ in range(num_warmup_steps):
        _, _, done, _ = env.step(env.action_space.sample())
        if done:
            return None
    # Force reward calculation.
    init_reward = env.reward[reward_space]
    assert init_reward is not None
    _, _, done, _ = env.step(action)
    if done:
        return None
    reward_after = env.reward[reward_space]
    assert reward_after is not None
    return reward_after
Пример #18
0
def apply_random_trajectory(
    env: CompilerEnv, random_trajectory_length_range=(1, 50)
) -> List[Tuple[int, observation_t, float, bool]]:
    """Evaluate and return a random trajectory."""
    num_actions = random.randint(*random_trajectory_length_range)
    trajectory = []
    for _ in range(num_actions):
        action = env.action_space.sample()
        observation, reward, done, _ = env.step(action)
        if done:
            break  # Broken trajectory.
        trajectory.append((action, observation, reward, done))

    return trajectory
def run_one_trial(
    env: CompilerEnv, reward_space: str, min_steps: int, max_steps: int
) -> Optional[float]:
    """Run a random number of random steps in an environment and return the
    cumulative reward.

    :return: A cumulative reward.
    """
    num_steps = random.randint(min_steps, max_steps)
    warmup_actions = [env.action_space.sample() for _ in range(num_steps)]
    env.reward_space = reward_space
    _, _, done, _ = env.step(warmup_actions)
    if done:
        return None
    return env.episode_reward
def test_step(env: CompilerEnv, action_name: str):
    """Run each action on a single benchmark."""
    env.eager_reward_space = "IrInstructionCount"
    env.eager_observation_space = "Autophase"
    env.reset(benchmark="cBench-v0/crc32")
    observation, reward, done, info = env.step(
        env.action_space.from_string(action_name)
    )

    if done:
        assert observation is None
        assert reward is None
    else:
        assert isinstance(observation, np.ndarray)
        assert observation.shape == (56,)
        assert reward < 0
Пример #21
0
def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path):
    logs_path = outdir / logs.BEST_ACTIONS_PROGRESS_NAME
    start_time = time()

    if config.enable_llvm_env:
        if isinstance(env, LlvmEnv):
            env.write_bitcode(outdir / "unoptimized.bc")

    with open(str(logs_path), "w") as f:
        ep_reward = 0
        for i, action in enumerate(action_names, start=1):
            _, reward, done, _ = env.step(env.action_space.names.index(action))
            assert not done
            ep_reward += reward
            print(
                f"Step [{i:03d} / {len(action_names):03d}]: reward={reward:.4f}   \t"
                f"episode={ep_reward:.4f}   \taction={action}")
            progress = RandomSearchProgressLogEntry(
                runtime_seconds=time() - start_time,
                total_episode_count=1,
                total_step_count=i,
                num_passes=i,
                reward=reward,
            )
            print(progress.to_csv(), action, file=f, sep=",")

    if config.enable_llvm_env:
        if isinstance(env, LlvmEnv):
            env.write_bitcode(outdir / "optimized.bc")
            print(
                tabulate(
                    [
                        (
                            "IR instruction count",
                            env.observation["IrInstructionCountO0"],
                            env.observation["IrInstructionCountOz"],
                            env.observation["IrInstructionCount"],
                        ),
                        (
                            "Object .text size (bytes)",
                            env.observation["ObjectTextSizeO0"],
                            env.observation["ObjectTextSizeOz"],
                            env.observation["ObjectTextSizeBytes"],
                        ),
                    ],
                    headers=("", "-O0", "-Oz", "final"),
                ))
Пример #22
0
def run_one_trial(env: CompilerEnv, reward: str, min_steps: int,
                  max_steps: int) -> Optional[float]:
    """Run a random number of "warmup" steps in an environment, then compute
    the reward delta of the given action.

        :return: The ratio of reward improvement.
    """
    num_steps = random.randint(min_steps, max_steps)
    init_reward = env.reward[reward]
    for _ in range(num_steps):
        _, _, done, _ = env.step(env.action_space.sample())
        if done:
            return None
    reward_after = env.reward[reward]
    assert init_reward is not None
    assert reward_after is not None
    return (reward_after - init_reward) / init_reward
Пример #23
0
def run_random_walk(env: CompilerEnv, step_count: int) -> List[float]:
    """Perform a random walk of the action space.

    :param env: The environment to use.
    :param step_count: The number of steps to run. This value is an upper bound -
        fewer steps will be performed if any of the actions lead the
        environment to end the episode.
    :return: The list of observed rewards.
    """
    rewards = []

    step_num = 0
    with Timer() as episode_time:
        env.reset()
        for step_num in range(1, step_count + 1):
            action_index = env.action_space.sample()
            with Timer() as step_time:
                observation, reward, done, info = env.step(action_index)
            print(f"\n=== Step {humanize.intcomma(step_num)} ===")
            print(f"Action:       {env.action_space.names[action_index]} "
                  f"(changed={not info.get('action_had_no_effect')})")
            rewards.append(reward)
            print(f"Reward:       {reward}")
            if env._eager_observation:
                print(f"Observation:\n{observation}")
            print(f"Step time:    {step_time}")
            if done:
                print("Episode ended by environment")
                break
        env.close()

    def reward_delta(reward):
        delta = rewards[0] / max(reward, 1e-9) - 1
        return emph(f"{'+' if delta >= 0 else ''}{delta:.2%}")

    print(
        f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} "
        f"({step_num / episode_time.time:.1f} steps / sec).")
    print(f"Init reward:  {rewards[0]}")
    print(f"Final reward: {rewards[-1]} ({reward_delta(rewards[-1])})")
    print(f"Max reward:   {max(rewards)} ({reward_delta(max(rewards))} "
          f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})")
    return rewards
Пример #24
0
def apply_random_trajectory(
    env: CompilerEnv,
    random_trajectory_length_range=(1, 50),
    timeout: int = 0,
) -> List[Tuple[int, ObservationType, float, bool]]:
    """Evaluate and return a random trajectory."""
    end_time = time() + timeout
    num_actions = random.randint(*random_trajectory_length_range)
    trajectory = []
    for _ in range(num_actions):
        action = env.action_space.sample()
        observation, reward, done, _ = env.step(action)
        if done:
            break  # Broken trajectory.
        trajectory.append((action, observation, reward, done))
        if timeout and time() > end_time:
            break

    return trajectory
Пример #25
0
def test_step(env: CompilerEnv, action_name: str):
    """Run each action on a single benchmark."""
    env.reward_space = "IrInstructionCount"
    env.observation_space = "Autophase"
    env.reset(benchmark="cbench-v1/crc32")
    observation, reward, done, _ = env.step(
        env.action_space.from_string(action_name))

    assert isinstance(observation, np.ndarray)
    assert observation.shape == (AUTOPHASE_FEATURE_DIM, )
    assert isinstance(reward, float)
    assert isinstance(done, bool)

    try:
        env.close()
    except ServiceError as e:
        # env.close() will raise an error if the service terminated
        # ungracefully. In that case, the "done" flag should have been set.
        assert done, f"Service error was raised when 'done' flag not set: {e}"
Пример #26
0
def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path):
    logs_path = outdir / logs.BEST_ACTIONS_PROGRESS_NAME
    start_time = time()
    init_reward = env.reward[env.eager_reward_space]

    print(
        f"Step [{0:03d} / {len(action_names):03d}]: reward={init_reward:.4f}")

    with open(str(logs_path), "w") as f:
        progress = logs.ProgressLogEntry(
            runtime_seconds=time() - start_time,
            total_episode_count=1,
            total_step_count=0,
            num_passes=0,
            reward=init_reward,
        )
        print(progress.to_csv(), "", file=f, sep="")

        previous_reward = init_reward
        for i, action in enumerate(action_names, start=1):
            _, reward, done, _ = env.step(env.action_space.names.index(action))
            assert not done
            print(
                f"Step [{i:03d} / {len(action_names):03d}]: reward={reward:.4f}, "
                f"change={reward-previous_reward:.4f}, action={action}")
            progress = logs.ProgressLogEntry(
                runtime_seconds=time() - start_time,
                total_episode_count=1,
                total_step_count=i,
                num_passes=i,
                reward=reward,
            )
            print(progress.to_csv(), action, file=f, sep=",")
            previous_reward = reward

    if isinstance(env, LlvmEnv):
        bitcode_path = outdir / logs.OPTIMIZED_BITCODE
        # Write optimized bitcode to file.
        temppath = env.observation["BitcodeFile"]
        # Copy, don't rename, since rename will fail if the paths are on
        # different devices.
        shutil.copyfile(temppath, str(bitcode_path))
        os.remove(temppath)
Пример #27
0
def run_random_walk(env: CompilerEnv, step_count: int) -> None:
    """Perform a random walk of the action space.

    :param env: The environment to use.
    :param step_count: The number of steps to run. This value is an upper bound -
        fewer steps will be performed if any of the actions lead the
        environment to end the episode.
    """
    rewards = []

    step_num = 0
    with Timer() as episode_time:
        env.reset()
        for step_num in range(1, step_count + 1):
            action_index = env.action_space.sample()
            with Timer() as step_time:
                observation, reward, done, info = env.step(action_index)
            print(f"\n=== Step {humanize.intcomma(step_num)} ===\n"
                  f"Action:       {env.action_space.names[action_index]} "
                  f"(changed={not info.get('action_had_no_effect')})\n"
                  f"Reward:       {reward}")
            rewards.append(reward)
            if env.observation_space:
                print(f"Observation:\n{observation}")
            print(f"Step time:    {step_time}")
            if done:
                print("Episode ended by environment")
                break

    def reward_percentage(reward, rewards):
        if sum(rewards) == 0:
            return 0
        percentage = reward / sum(rewards)
        return emph(f"{'+' if percentage >= 0 else ''}{percentage:.2%}")

    print(
        f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} "
        f"({step_num / episode_time.time:.1f} steps / sec).\n"
        f"Total reward: {sum(rewards)}\n"
        f"Max reward:   {max(rewards)} ({reward_percentage(max(rewards), rewards)} "
        f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})")
Пример #28
0
def run_manual_env(env: CompilerEnv):
    """Run an environment manually.

    The manual environment allows the user to step through the environment,
    selection observations, rewards, and actions to run as they see fit. This is
    useful for debugging.

    :param env: The environment to run.
    """
    benchmark = None
    if not env.benchmark:
        # Allow the user to choose a benchmark, with the first choice being
        # to select randomly.
        benchmarks = sorted(env.benchmarks)
        if not benchmarks:
            print(
                "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks"
            )
            print("Exiting...")
            env.close()
            return

        # Strip default benchmark:// protocol.
        for i, benchmark in enumerate(benchmarks):
            if benchmark.startswith("benchmark://"):
                benchmarks[i] = benchmark[len("benchmark://"):]

        benchmark_index = user_input.read_list_index("Benchmark",
                                                     ["random"] + benchmarks)
        if benchmark_index:
            benchmark = benchmarks[benchmark_index - 1]
        else:
            benchmark = None

    with Timer() as timer:
        eager_observation = env.reset(benchmark=benchmark)

    print(f"Reset {env.benchmark} environment in {timer}")
    if env.observation_space and eager_observation is not None:
        print(
            f"Observation: {env.observation_space.to_string(eager_observation)}"
        )

    observation_names = sorted(env.observation.spaces.keys())
    reward_names = sorted(env.reward.spaces.keys())
    last_eager_reward: Optional[float] = None
    step_count = 1

    while True:
        print(
            f"\nStep {step_count}. Select: [{emph('a')}]ction "
            f"[{emph('o')}]bservation [{emph('r')}]eward "
            f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ",
            end="",
            flush=True,
        )
        while True:
            c = user_input.read_char()
            if c == "a":
                print("action", flush=True)
                index = user_input.read_list_index("Actions", ["random"] +
                                                   env.action_space.names)
                step_count += 1
                with Timer() as t:
                    if index == 0:
                        # User selected "random" action.
                        index = env.action_space.sample()
                    else:
                        # Offset to remove "random" action from index.
                        index -= 1
                    eager_observation, eager_reward, done, info = env.step(
                        index)

                # Print the eager observation, if available.
                if env.observation_space and eager_observation is not None:
                    print(
                        f"Observation: {env.observation_space.to_string(eager_observation)}"
                    )

                # Print the eager reward and the diff, if available.
                if env.reward_space and eager_reward is not None:
                    reward_diff = ""
                    if last_eager_reward is not None and eager_reward is not None:
                        reward_diff = (
                            f" (change: {eager_reward - last_eager_reward:.6f})"
                        )
                    print(f"Reward: {eager_reward:.6f}{reward_diff}")
                    last_eager_reward = eager_reward

                print(
                    f"Action {env.action_space.names[index]} in {t}.",
                    " No effect." if info.get("action_had_no_effect") else "",
                    flush=True,
                )
                if done:
                    print("Episode ended by environment: ",
                          info["error_details"])
                    env.close()
                    return
                break
            if c == "o":
                print("observation", flush=True)
                observation_name = user_input.read_list_value(
                    "Observable values", observation_names)
                with Timer() as timer:
                    value = env.observation[observation_name]
                print(
                    env.observation.spaces[observation_name].to_string(value))
                print(f"Observation {observation_name} in {timer}")
                break
            elif c == "r":
                print("reward", flush=True)
                reward_name = user_input.read_list_value(
                    "Rewards", reward_names)
                with Timer(f"Reward {reward_name}"):
                    print(f"{env.reward[reward_name]:.6f}")
                break
            elif c == "c":
                print("commandline")
                print("$", env.commandline(), flush=True)
                break
            elif c == "e":
                print("end", flush=True)
                with Timer("Closed environment"):
                    env.close()
                print("Have a nice day!")
                return
Пример #29
0
def test_Step_out_of_range(env: CompilerEnv):
    """Test error handling with an invalid action."""
    env.reset()
    with pytest.raises(ValueError) as ctx:
        env.step(100)
    assert str(ctx.value) == "Out-of-range"
Пример #30
0
def test_step_before_reset(env: CompilerEnv):
    """Taking a step() before reset() is illegal."""
    with pytest.raises(SessionNotFound,
                       match=r"Must call reset\(\) before step\(\)"):
        env.step(0)