def test_deterministic_action(env: LlvmEnv, benchmark_name: str,
                              action_name: str):
    """Run an action multiple times from the same starting state and check that
    the generated LLVM-IR is the same.

    Do this for every combination of benchmark and action. This generates many
    tests.

    Caveats of this test:

        * The initial states are all unoptimized benchmarks. If a pass depends
          on other passes to take effect it will not be tested.

        * Non-determinism is tested by running the action 20 times. Extremely
          unlikely non-determinism may not be detected.
    """
    env.observation_space = "Ir"

    checksums = set()
    for i in range(1, ACTION_REPTITION_COUNT + 1):
        ir = env.reset(benchmark=benchmark_name)
        checksum_before = sha1(ir)

        ir, _, done, _ = env.step(env.action_space.names.index(action_name))
        assert not done
        checksums.add(sha1(ir))

        if len(checksums) != 1:
            pytest.fail(f"Repeating the {action_name} action {i} times on "
                        f"{benchmark_name} produced different states")

        # An action which has no effect is not likely to be nondeterministic.
        if list(checksums)[0] == checksum_before:
            break
Exemplo n.º 2
0
def test_fuzz(env: LlvmEnv):
    """Run an action multiple times from the same starting state and check that
    the generated LLVM-IR is the same.

    Caveats of this test:

        * The initial state is an unoptimized benchmark. If a pass depends
          on other passes to take effect it will not be tested.

        * Non-determinism is tested by running the action 20 times. Extremely
          unlikely non-determinism may not be detected.
    """
    action = env.action_space.sample()
    action_name = env.action_space.names[action]
    benchmark = random.choice(BENCHMARK_NAMES)

    env.observation_space = "Ir"

    checksums = set()
    for i in range(1, ACTION_REPTITION_COUNT + 1):
        ir = env.reset(benchmark=benchmark)
        checksum_before = sha1(ir)

        ir, _, done, _ = env.step(action)
        assert not done
        checksums.add(sha1(ir))

        if len(checksums) != 1:
            pytest.fail(f"Repeating the {action_name} action {i} times on "
                        f"{benchmark} produced different states")

        # An action which has no effect is not likely to be nondeterministic.
        if list(checksums)[0] == checksum_before:
            break
Exemplo n.º 3
0
def test_fork_child_process_is_not_orphaned(env: LlvmEnv):
    env.reset("cbench-v1/crc32")
    with env.fork() as fkd:
        # Check that both environments share the same service.
        assert isinstance(env.service.connection.process, subprocess.Popen)
        assert isinstance(fkd.service.connection.process, subprocess.Popen)

        assert env.service.connection.process.pid == fkd.service.connection.process.pid
        process = env.service.connection.process

        # Sanity check that both services are alive.
        assert not env.service.connection.process.poll()
        assert not fkd.service.connection.process.poll()

        # Close the parent service.
        env.close()

        # Check that the service is still alive.
        assert not env.service
        assert not fkd.service.connection.process.poll()

        # Close the forked service.
        fkd.close()

        # Check that the service has been killed.
        assert process.poll() is not None
Exemplo n.º 4
0
def test_invalid_benchmark_missing_file(env: LlvmEnv):
    benchmark = Benchmark(uri="benchmark://new", )

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert str(ctx.value) == "No program set"
Exemplo n.º 5
0
def test_fork_chain_child_processes_are_not_orphaned(env: LlvmEnv):
    env.reset("cbench-v1/crc32")

    # Create a chain of forked environments.
    a = env.fork()
    b = a.fork()
    c = b.fork()
    d = c.fork()

    try:
        # Sanity check that they share the same underlying service.
        assert (env.service.connection.process == a.service.connection.process
                == b.service.connection.process == c.service.connection.process
                == d.service.connection.process)
        proc = env.service.connection.process
        # Kill the forked environments one by one.
        a.close()
        assert proc.poll() is None
        b.close()
        assert proc.poll() is None
        c.close()
        assert proc.poll() is None
        d.close()
        assert proc.poll() is None
        # Kill the final environment, refcount 0, service is closed.
        env.close()
        assert proc.poll() is not None
    finally:
        a.close()
        b.close()
        c.close()
        d.close()
Exemplo n.º 6
0
def test_fork_twice_test(env: LlvmEnv):
    """Test that fork() on a forked environment works."""
    env.reset(benchmark="cbench-v1/crc32")
    with env.fork() as fork_a:
        with fork_a.fork() as fork_b:
            assert env.state == fork_a.state
            assert fork_a.state == fork_b.state
Exemplo n.º 7
0
def test_validate_known_bad_trajectory(env: LlvmEnv, state):
    env.apply(state)
    for _ in range(VALIDATION_FLAKINESS):
        result = env.validate()
        if result.okay():
            pytest.fail(
                "Validation succeeded on state where it should have failed")
Exemplo n.º 8
0
def test_non_validatable_benchmark_callback(
        env: LlvmEnv, non_validatable_benchmark_name: str):
    """Run the validation routine on unoptimized versions of all benchmarks."""
    env.reset(benchmark=non_validatable_benchmark_name)
    cb = get_llvm_benchmark_validation_callback(env)

    assert cb is None
def test_fork_state_fuzz_test(env: LlvmEnv):
    """Run random episodes and check that fork() produces equivalent state."""
    end_time = time() + FUZZ_TIME_SECONDS
    while time() < end_time:
        env.reset(benchmark="cBench-v0/dijkstra")

        # Take a few warmup steps to get an environment in a random state.
        for _ in range(A_FEW_RANDOM_ACTIONS):
            _, _, done, _ = env.step(env.action_space.sample())
            if done:  # Broken episode, restart.
                break
        else:
            # Fork the environment and check that the states are equivalent.
            new_env = env.fork()
            try:
                assert env.state == new_env.state
                # Check that environment states remain equal if identical
                # subsequent steps are taken.
                for _ in range(A_FEW_RANDOM_ACTIONS):
                    action = env.action_space.sample()
                    _, _, done_a, _ = env.step(action)
                    _, _, done_b, _ = new_env.step(action)
                    assert done_a == done_b
                    if done_a:  # Broken episode, restart.
                        break
                    assert env.state == new_env.state
            finally:
                new_env.close()
Exemplo n.º 10
0
def test_make_benchmark_from_command_line_system_includes(env: LlvmEnv):
    with temporary_working_directory():
        with open("in.c", "w") as f:
            f.write("""
#include <stdio.h>
int main() { return 0; }
""")
        env.make_benchmark_from_command_line("gcc in.c")
Exemplo n.º 11
0
def test_fork_state(env: LlvmEnv):
    env.reset("cbench-v1/crc32")
    env.step(0)
    assert env.actions == [0]

    with env.fork() as fkd:
        assert fkd.benchmark == fkd.benchmark
        assert fkd.actions == env.actions
Exemplo n.º 12
0
def test_invalid_benchmark_data(env: LlvmEnv):
    benchmark = Benchmark.from_file_contents("benchmark://new",
                                             "Invalid bitcode".encode("utf-8"))

    with pytest.raises(
            ValueError,
            match='Failed to parse LLVM bitcode: "benchmark://new"'):
        env.reset(benchmark=benchmark)
Exemplo n.º 13
0
def test_with_statement(env: LlvmEnv):
    """Test that the `with` statement context manager works on forks."""
    env.reset("cbench-v1/crc32")
    env.step(0)
    with env.fork() as fkd:
        assert fkd.in_episode
        assert fkd.actions == [0]
    assert not fkd.in_episode
    assert env.in_episode
Exemplo n.º 14
0
def test_fork_spaces_are_same(env: LlvmEnv):
    env.observation_space = "Autophase"
    env.reward_space = "IrInstructionCount"
    env.reset(benchmark="cbench-v1/crc32")

    with env.fork() as fkd:
        assert fkd.observation_space == env.observation_space
        assert fkd.reward_space == env.reward_space
        assert fkd.benchmark == env.benchmark
def test_invalid_benchmark_missing_file(env: LlvmEnv):
    benchmark = Benchmark(
        BenchmarkProto(
            uri="benchmark://new",
        )
    )

    with pytest.raises(ValueError, match="No program set"):
        env.reset(benchmark=benchmark)
Exemplo n.º 16
0
def test_invalid_benchmark_data(env: LlvmEnv):
    benchmark = Benchmark(
        uri="benchmark://new",
        program=File(contents="Invalid bitcode".encode("utf-8")))

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
Exemplo n.º 17
0
def test_benchmark_path_empty_file(env: LlvmEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        (tmpdir / "test.bc").touch()

        benchmark = Benchmark.from_file("benchmark://new", tmpdir / "test.bc")

        with pytest.raises(ValueError, match="Failed to parse LLVM bitcode"):
            env.reset(benchmark=benchmark)
Exemplo n.º 18
0
def test_benchmark_path_invalid_protocol(env: LlvmEnv):
    benchmark = Benchmark(uri="benchmark://new",
                          program=File(uri="invalid_protocol://test"))

    with pytest.raises(ValueError) as ctx:
        env.reset(benchmark=benchmark)

    assert (str(ctx.value) ==
            'Unsupported benchmark URI protocol: "invalid_protocol://test"')
Exemplo n.º 19
0
def test_make_benchmark_single_bitcode(env: LlvmEnv):
    benchmark = llvm.make_benchmark(EXAMPLE_BITCODE_FILE)

    assert benchmark.uri == f"file:///{EXAMPLE_BITCODE_FILE}"
    assert benchmark.program.uri == f"file:///{EXAMPLE_BITCODE_FILE}"

    env.reset(benchmark=benchmark)
    assert env.benchmark == benchmark.uri
    assert env.observation[
        "IrInstructionCount"] == EXAMPLE_BITCODE_IR_INSTRUCTION_COUNT
Exemplo n.º 20
0
def test_invalid_benchmark_path_contents(env: LlvmEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        with open(str(tmpdir / "test.bc"), "w") as f:
            f.write("Invalid bitcode")

        benchmark = Benchmark.from_file("benchmark://new", tmpdir / "test.bc")

        with pytest.raises(ValueError, match="Failed to parse LLVM bitcode"):
            env.reset(benchmark=benchmark)
def test_validate_cBench_unoptimized(env: LlvmEnv, benchmark_name: str):
    """Run the validation routine on unoptimized version of all cBench benchmarks."""
    env.reset(benchmark=benchmark_name)
    cb = datasets.get_llvm_benchmark_validation_callback(env)

    if benchmark_name in CBENCH_VALIDATION_EXCLUDE_LIST:
        assert cb is None
    else:
        assert cb
        assert cb(env) is None
Exemplo n.º 22
0
def train_and_run(env: LlvmEnv) -> None:
    """Run tabular Q learning on an environment"""
    FLAGS.log_every = 0  # Disable printing to stdout

    q_table: Dict[StateActionTuple, float] = {}
    env.observation_space = "Autophase"
    training_env = env.fork()
    train(q_table, training_env)
    training_env.close()
    rollout(q_table, env, printout=False)
Exemplo n.º 23
0
def test_make_benchmark_from_command_line_no_system_includes(env: LlvmEnv):
    with temporary_working_directory():
        with open("in.c", "w") as f:
            f.write("""
#include <stdio.h>
int main() { return 0; }
""")
        with pytest.raises(BenchmarkInitError, match="stdio.h"):
            env.make_benchmark_from_command_line("gcc in.c",
                                                 system_includes=False)
Exemplo n.º 24
0
def test_select_best_action_closed_environment(env: LlvmEnv):
    """Test that select_best_action() recovers from an environment whose service
    has closed."""
    env.reward_space = "IrInstructionCount"
    env.reset(benchmark="cbench-v1/crc32")
    with ThreadPoolExecutor() as executor:
        best_a = select_best_action(env, executor)
        env.close()
        best_b = select_best_action(env, executor)
        assert best_a == best_b
Exemplo n.º 25
0
def test_benchmark_path_not_found(env: LlvmEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/not_found"))

        with pytest.raises(FileNotFoundError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == f'File not found: "{tmpdir}/not_found"'
Exemplo n.º 26
0
    def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
        """Compute the final reward of the environment.

        Note that this may modify the environment state. You should call
        :code:`reset()` before continuing to use the environment after this.
        """
        # Reapply the environment state in a retry loop.
        actions = list(env.actions)
        env.reset()
        for i in range(1, 5 + 1):
            _, _, done, info = env.step(actions)
            if not done:
                break
            logger.warning(
                "Attempt %d to apply actions during final reward failed: %s",
                i,
                info.get("error_details"),
            )
        else:
            raise ValueError("Failed to replay environment's actions")

        if self.value == OptimizationTarget.CODESIZE:
            return env.observation.IrInstructionCountOz() / max(
                env.observation.IrInstructionCount(), 1)

        if self.value == OptimizationTarget.BINSIZE:
            return env.observation.ObjectTextSizeOz() / max(
                env.observation.ObjectTextSizeBytes(), 1)

        if self.value == OptimizationTarget.RUNTIME:
            with _RUNTIME_LOCK:
                with compiler_gym.make("llvm-v0",
                                       benchmark=env.benchmark) as new_env:
                    new_env.reset()
                    new_env.runtime_observation_count = runtime_count
                    new_env.runtime_warmup_count = 0
                    new_env.apply(env.state)
                    final_runtimes = new_env.observation.Runtime()
                    assert len(final_runtimes) == runtime_count

                    new_env.reset()
                    new_env.send_param("llvm.apply_baseline_optimizations",
                                       "-O3")
                    o3_runtimes = new_env.observation.Runtime()
                    assert len(o3_runtimes) == runtime_count

                logger.debug("O3 runtimes: %s", o3_runtimes)
                logger.debug("Final runtimes: %s", final_runtimes)
                speedup = np.median(o3_runtimes) / max(
                    np.median(final_runtimes), 1e-12)
                logger.debug("Speedup: %.4f", speedup)

                return speedup

        assert False, f"Unknown OptimizationTarget: {self.value}"
Exemplo n.º 27
0
def test_fork_state(env: LlvmEnv):
    env.reset("cBench-v0/crc32")
    env.step(0)
    assert env.actions == [0]

    new_env = env.fork()
    try:
        assert new_env.benchmark == new_env.benchmark
        assert new_env.actions == env.actions
    finally:
        new_env.close()
Exemplo n.º 28
0
def test_no_validation_callback_for_custom_benchmark(env: LlvmEnv):
    """Test that a custom benchmark has no validation callback."""
    with tempfile.TemporaryDirectory() as d:
        p = Path(d) / "example.c"
        with open(p, "w") as f:
            print("int main() {return 0;}", file=f)
        benchmark = env.make_benchmark(p)

    env.reset(benchmark=benchmark)

    assert not env.benchmark.is_validatable()
Exemplo n.º 29
0
def test_episode_reward_with_non_default_reward_space(env: LlvmEnv):
    """Test that episode_reward is not updated when custom rewards passed to
    step()."""
    env.reward_space = "IrInstructionCountOz"
    env.reset("cbench-v1/crc32")
    assert env.episode_reward == 0
    _, rewards, _, _ = env.step(
        env.action_space["-mem2reg"],
        rewards=["IrInstructionCount"],
    )
    assert rewards[0] > 0
    assert env.episode_reward == 0
Exemplo n.º 30
0
def test_benchmark_path_empty_file(env: LlvmEnv):
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        (tmpdir / "test.bc").touch()

        benchmark = Benchmark(uri="benchmark://new",
                              program=File(uri=f"file:///{tmpdir}/test.bc"))

        with pytest.raises(ValueError) as ctx:
            env.reset(benchmark=benchmark)

    assert str(ctx.value) == f'File is empty: "{tmpdir}/test.bc"'