def test_fuzz(env: LlvmEnv): """Run an action multiple times from the same starting state and check that the generated LLVM-IR is the same. Caveats of this test: * The initial state is an unoptimized benchmark. If a pass depends on other passes to take effect it will not be tested. * Non-determinism is tested by running the action 20 times. Extremely unlikely non-determinism may not be detected. """ action = env.action_space.sample() action_name = env.action_space.names[action] benchmark = random.choice(BENCHMARK_NAMES) env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): ir = env.reset(benchmark=benchmark) checksum_before = sha1(ir) ir, _, done, _ = env.step(action) assert not done checksums.add(sha1(ir)) if len(checksums) != 1: pytest.fail(f"Repeating the {action_name} action {i} times on " f"{benchmark} produced different states") # An action which has no effect is not likely to be nondeterministic. if list(checksums)[0] == checksum_before: break
def test_fuzz(env: LlvmEnv, reward_space: str): """This test produces a random trajectory, resets the environment, then replays the trajectory and checks that it produces the same state. """ env.observation_space = "Autophase" env.reward_space = reward_space benchmark = env.datasets["generator://csmith-v0"].random_benchmark() print(benchmark.uri) # For debugging in case of failure. try: env.reset(benchmark=benchmark) except BenchmarkInitError: return trajectory = apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE ) print(env.state) # For debugging in case of failure. env.reset(benchmark=benchmark) for i, (action, observation, reward, done) in enumerate(trajectory, start=1): print(f"Replaying step {i}: {env.action_space.flags[action]}") replay_observation, replay_reward, replay_done, info = env.step(action) assert done == replay_done, info np.testing.assert_array_almost_equal(observation, replay_observation) np.testing.assert_almost_equal(reward, replay_reward)
def test_deterministic_action(env: LlvmEnv, benchmark_name: str, action_name: str): """Run an action multiple times from the same starting state and check that the generated LLVM-IR is the same. Do this for every combination of benchmark and action. This generates many tests. Caveats of this test: * The initial states are all unoptimized benchmarks. If a pass depends on other passes to take effect it will not be tested. * Non-determinism is tested by running the action 20 times. Extremely unlikely non-determinism may not be detected. """ env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): ir = env.reset(benchmark=benchmark_name) checksum_before = sha1(ir) ir, _, done, _ = env.step(env.action_space.names.index(action_name)) assert not done checksums.add(sha1(ir)) if len(checksums) != 1: pytest.fail(f"Repeating the {action_name} action {i} times on " f"{benchmark_name} produced different states") # An action which has no effect is not likely to be nondeterministic. if list(checksums)[0] == checksum_before: break
def test_fork_spaces_are_same(env: LlvmEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset(benchmark="cbench-v1/crc32") with env.fork() as fkd: assert fkd.observation_space == env.observation_space assert fkd.reward_space == env.reward_space assert fkd.benchmark == env.benchmark
def train_and_run(env: LlvmEnv) -> None: """Run tabular Q learning on an environment""" FLAGS.log_every = 0 # Disable printing to stdout q_table: Dict[StateActionTuple, float] = {} env.observation_space = "Autophase" training_env = env.fork() train(q_table, training_env) training_env.close() rollout(q_table, env, printout=False)
def test_gvn_sink_non_determinism(env: LlvmEnv, benchmark_name: str): """Regression test for -gvn-sink non-determinism. See: https://github.com/facebookresearch/CompilerGym/issues/46 """ env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): env.reset(benchmark=benchmark_name) ir, _, done, _ = env.step(env.action_space.names.index("-gvn-sink")) assert not done sha1 = hashlib.sha1() sha1.update(ir.encode("utf-8")) checksums.add(sha1.hexdigest()) if len(checksums) != 1: pytest.fail( f"Repeating the -gvn-sink action {i} times on {benchmark_name} " "produced different states")
def test_fuzz(env: LlvmEnv, reward_space: str): """This test generates a random trajectory and checks that fork() produces an equivalent state. It then runs a second trajectory on the two environments to check that behavior is consistent across them. """ env.observation_space = "Autophase" env.reward_space = reward_space env.reset() print(f"Running fuzz test of environment {env.benchmark}") # Take a few warmup steps to get an environment in a random state. for _ in range(PRE_FORK_ACTIONS): _, _, done, _ = env.step(env.action_space.sample()) if done: # Broken episode, restart. break else: # Fork the environment and check that the states are equivalent. fkd = env.fork() try: print(env.state) # For debugging in case of error. assert env.state == fkd.state # Check that environment states remain equal if identical # subsequent steps are taken. for _ in range(POST_FORK_ACTIONS): action = env.action_space.sample() observation_a, reward_a, done_a, _ = env.step(action) observation_b, reward_b, done_b, _ = fkd.step(action) print(env.state) # For debugging in case of error. assert done_a == done_b np.testing.assert_array_almost_equal(observation_a, observation_b) if reward_a != reward_b: pytest.fail( f"Parent environment produced reward {reward_a}, fork produced reward {reward_b}" ) if done_a: break # Broken episode, we're done. assert env.state == fkd.state finally: fkd.close()
def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str): """This test produces a random trajectory using a program generated using llvm-stress. """ benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark() print(benchmark.uri) # For debugging in case of failure. env.observation_space = observation_space env.reward_space = reward_space try: env.reset(benchmark=benchmark) except BenchmarkInitError: return # Benchmark is invalid. apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE, timeout=10, ) print(env.state) # For debugging in case of failure.
def test_fuzz(env: LlvmEnv, reward_space: str): """This test produces a random trajectory, resets the environment, then replays the trajectory and checks that it produces the same state. """ env.observation_space = "Autophase" env.reward_space = reward_space env.reset(benchmark=random.choice(BENCHMARK_NAMES)) trajectory = apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE) print(env.state) # For debugging in case of failure. env.reset() for i, (action, observation, reward, done) in enumerate(trajectory, start=1): print(f"Replaying step {i}: {env.action_space.flags[action]}") replay_observation, replay_reward, replay_done, info = env.step(action) assert done == replay_done, info np.testing.assert_array_almost_equal(observation, replay_observation) np.testing.assert_almost_equal(reward, replay_reward)
def run(env: LlvmEnv) -> None: agent = Agent(n_actions=15, input_dims=[69]) env.observation_space = "InstCountNorm" agent.Q_eval.load_state_dict(torch.load("./H10-N4000-INSTCOUNTNORM.pth")) rollout(agent, env)