示例#1
0
def select_best_action(env: CompilerEnv,
                       executor: ThreadPoolExecutor) -> RewardAction:
    """Determine the best action by trying all possible options and ranking them."""
    def eval_action(fkd: CompilerEnv, action: int) -> RewardAction:
        """Evaluate the given action."""
        try:
            _, reward, _, _ = fkd.step(action)
        finally:
            fkd.close()
        return RewardAction(reward=reward, action=action)

    # Select the best action using the reward that the action produces, then
    # action index as a tie-breaker. Do this by creating n forks of the
    # environment, one for every action, and evaluting the actions in parallel
    # threads. Note that calls to fork() occur in the main thread for thread
    # safety in case of environment restart.
    futures = (executor.submit(eval_action, env.fork(), action)
               for action in range(env.action_space.n))
    best_reward_action = RewardAction(reward=-float("inf"), action=0)
    for future in as_completed(futures):
        reward_action: RewardAction = future.result()
        if reward_action > best_reward_action:
            best_reward_action = reward_action

    return best_reward_action
示例#2
0
def test_fork(env: CompilerEnv):
    env.reset()
    env.step(0)
    env.step(1)
    other_env = env.fork()
    try:
        assert env.benchmark == other_env.benchmark
        assert other_env.actions == [0, 1]
    finally:
        other_env.close()
示例#3
0
def hill_climb(env: CompilerEnv):
    best = float("inf")
    for _ in range(FLAGS.gcc_search_budget):
        with env.fork() as fkd:
            fkd.choices = [
                random.randint(max(-1, x - 5),
                               min(len(env.gcc_spec.options[i]) - 1, x + 5))
                for i, x in enumerate(env.choices)
            ]
            cost = objective(fkd)
            if cost < objective(env):
                best = cost
                env.choices = fkd.choices
    return best
示例#4
0
def test_fork(benchmark, env: CompilerEnv, benchmark_name):
    env.reset(benchmark_name)
    benchmark(lambda: env.fork().close())