Пример #1
0
def test_contains_with_float_scalar_range():
    space = Sequence(
        name="test",
        size_range=(3, 3),
        dtype=float,
        scalar_range=Scalar(name="test", min=0, max=1, dtype=float),
    )
    assert space.contains([0.0, 0.0, 0.0])
    assert space.contains([0.1, 1.0, 0.5])
    assert not space.contains([0.0, 0.0, -1.0])  # out of bounds
    assert not space.contains([0.0, 0, 0.1])  # wrong dtype
    assert not space.contains([0.0, 0])  # wrong shape
Пример #2
0
def test_convert_to_double_sequence_space():
    seq = Sequence(
        name=None,
        dtype=np.float64,
        size_range=(1, 2),
        scalar_range=Scalar(name=None, min=4.0, max=5.0, dtype=np.float64),
    )
    converted_seq = py_converters.convert_to_ranged_sequence_space(seq)
    assert isinstance(converted_seq, DoubleSequenceSpace)
    assert converted_seq.length_range.min == 1
    assert converted_seq.length_range.max == 2
    assert isinstance(converted_seq.scalar_range, DoubleRange)
    assert converted_seq.scalar_range.min == 4.0
    assert converted_seq.scalar_range.max == 5.0
Пример #3
0
def test_convert_to_float_sequence_space():
    seq = Sequence(
        name=None,
        dtype=np.float32,
        size_range=(1, 2),
        scalar_range=Scalar(name=None, min=4, max=5, dtype=np.float32),
    )
    converted_seq = py_converters.convert_to_ranged_sequence_space(seq)
    assert isinstance(converted_seq, FloatSequenceSpace)
    assert converted_seq.length_range.min == 1
    assert converted_seq.length_range.max == 2
    assert isinstance(converted_seq.scalar_range, FloatRange)
    assert np.isclose(converted_seq.scalar_range.min, 4)
    assert np.isclose(converted_seq.scalar_range.max, 5)
Пример #4
0
def test_convert_to_int64_sequence_space():
    seq = Sequence(
        name=None,
        dtype=np.int64,
        size_range=(1, 2),
        scalar_range=Scalar(name=None, min=4, max=5, dtype=np.int64),
    )
    converted_seq = py_converters.convert_to_ranged_sequence_space(seq)
    assert isinstance(converted_seq, Int64SequenceSpace)
    assert converted_seq.length_range.min == 1
    assert converted_seq.length_range.max == 2
    assert isinstance(converted_seq.scalar_range, Int64Range)
    assert converted_seq.scalar_range.min == 4
    assert converted_seq.scalar_range.max == 5
Пример #5
0
def test_convert_to_boolean_sequence_space():
    seq = Sequence(
        name=None,
        dtype=bool,
        size_range=(1, 2),
        scalar_range=Scalar(name=None, min=True, max=False, dtype=bool),
    )
    converted_seq = py_converters.convert_to_ranged_sequence_space(seq)
    assert isinstance(converted_seq, BooleanSequenceSpace)
    assert converted_seq.length_range.min == 1
    assert converted_seq.length_range.max == 2
    assert isinstance(converted_seq.scalar_range, BooleanRange)
    assert converted_seq.scalar_range.min == True  # noqa: E712
    assert converted_seq.scalar_range.max == False  # noqa: E712
Пример #6
0
def test_observation_spaces(env: CompilerEnv):
    """Test that the environment reports the service's observation spaces."""
    env.reset()
    assert env.observation.spaces.keys() == {"ir", "features", "runtime"}
    assert env.observation.spaces["ir"].space == Sequence(
        name="test", size_range=(0, None), dtype=str, opaque_data_format="")
    assert env.observation.spaces["features"].space == Box(name="test",
                                                           shape=(3, ),
                                                           low=-100,
                                                           high=100,
                                                           dtype=int)
    assert env.observation.spaces["runtime"].space == Scalar(name="test",
                                                             min=0,
                                                             max=np.inf,
                                                             dtype=float)
Пример #7
0
def test_observation_spaces_failing_because_of_bug(gcc_bin: str):
    """Test that the environment reports the service's observation spaces."""
    with gym.make("gcc-v0", gcc_bin=gcc_bin) as env:
        env.reset()
        assert env.observation.spaces.keys() == {
            "asm_hash",
            "asm_size",
            "asm",
            "choices",
            "command_line",
            "instruction_counts",
            "obj_hash",
            "obj_size",
            "obj",
            "rtl",
            "source",
        }
        assert env.observation.spaces["obj_size"].space == Scalar(
            name="obj_size", min=-1, max=np.iinfo(np.int64).max, dtype=int)
        assert env.observation.spaces["asm"].space == Sequence(
            name="asm", size_range=(0, None), dtype=str, opaque_data_format="")
Пример #8
0
def test_bytes_contains():
    space = Sequence(name="test", size_range=(0, None), dtype=bytes)
    assert space.contains(b"Hello, world!")
    assert space.contains(b"")
    assert not space.contains("Hello, world!")
Пример #9
0
def test_observation_spaces(env: CompilerEnv):
    """Test that the environment reports the service's observation spaces."""
    env.reset()
    assert env.observation.spaces.keys() == {
        "ir",
        "Inst2vec",
        "Autophase",
        "AutophaseDict",
        "Programl",
        "runtime",
        "size",
    }
    assert env.observation.spaces["ir"].space == Sequence(
        name="ir",
        size_range=(0, np.iinfo(int).max),
        dtype=str,
    )
    assert env.observation.spaces["Inst2vec"].space == Sequence(
        name="Inst2vec",
        size_range=(0, np.iinfo(int).max),
        scalar_range=Scalar(
            name=None,
            min=np.iinfo(np.int64).min,
            max=np.iinfo(np.int64).max,
            dtype=np.int64,
        ),
        dtype=int,
    )
    assert env.observation.spaces["Autophase"].space == Sequence(
        name="Autophase",
        size_range=(len(AUTOPHASE_FEATURE_NAMES),
                    len(AUTOPHASE_FEATURE_NAMES)),
        scalar_range=Scalar(
            name=None,
            min=np.iinfo(np.int64).min,
            max=np.iinfo(np.int64).max,
            dtype=np.int64,
        ),
        dtype=int,
    )
    assert env.observation.spaces["AutophaseDict"].space == Dict(
        name="AutophaseDict",
        spaces={
            name: Scalar(name=None,
                         min=0,
                         max=np.iinfo(np.int64).max,
                         dtype=np.int64)
            for name in AUTOPHASE_FEATURE_NAMES
        },
    )
    assert env.observation.spaces["Programl"].space == Sequence(
        name="Programl",
        size_range=(0, np.iinfo(int).max),
        dtype=str,
    )
    assert env.observation.spaces["runtime"].space == Scalar(name="runtime",
                                                             min=0,
                                                             max=np.inf,
                                                             dtype=float)
    assert env.observation.spaces["size"].space == Scalar(name="size",
                                                          min=0,
                                                          max=np.inf,
                                                          dtype=float)
Пример #10
0
def test_str_contains_too_short():
    space = Sequence(name="test", size_range=(3, None), dtype=str)
    assert space.contains("Hello, world!")
    assert not space.contains("")
    assert not space.contains([1, 2, 3])
Пример #11
0
def test_int_contains():
    space = Sequence(name="test", size_range=(5, 5), dtype=int)
    assert not space.contains(list(range(4)))
    assert space.contains(list(range(5)))
    assert not space.contains(list(range(6)))
Пример #12
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.actions: List[int] = []
        self.datasets_site_path = site_data_path(
            "llvm/10.0.0/bitcode_benchmarks")

        # Register the LLVM datasets.
        self.datasets_site_path.mkdir(parents=True, exist_ok=True)
        self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True)
        for dataset in LLVM_DATASETS:
            self.register_dataset(dataset)

        self.inst2vec = _INST2VEC_ENCODER

        self.observation.spaces["CpuInfo"].space = DictSpace({
            "name":
            Sequence(size_range=(0, None), dtype=str),
            "cores_count":
            Scalar(min=None, max=None, dtype=int),
            "l1i_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l1i_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l1d_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l1d_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l2_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l2_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l3_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l3_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l4_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l4_cache_count":
            Scalar(min=None, max=None, dtype=int),
        })

        self.observation.add_derived_space(
            id="Inst2vecPreprocessedText",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=str),
            cb=lambda base_observation: self.inst2vec.preprocess(
                base_observation),
            default_value="",
        )
        self.observation.add_derived_space(
            id="Inst2vecEmbeddingIndices",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=np.int32),
            cb=lambda base_observation: self.inst2vec.encode(
                self.inst2vec.preprocess(base_observation)),
            default_value=np.array([self.inst2vec.vocab["!UNK"]]),
        )
        self.observation.add_derived_space(
            id="Inst2vec",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=np.ndarray),
            cb=lambda base_observation: self.inst2vec.embed(
                self.inst2vec.encode(self.inst2vec.preprocess(base_observation)
                                     )),
            default_value=np.vstack(
                [self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]]]),
        )

        self.observation.add_derived_space(
            id="AutophaseDict",
            base_id="Autophase",
            space=DictSpace({
                name: Scalar(min=0, max=None, dtype=int)
                for name in AUTOPHASE_FEATURE_NAMES
            }),
            cb=lambda base_observation: {
                name: val
                for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation)
            },
        )
Пример #13
0
def test_sample():
    space = Sequence(name="test", size_range=(0, None), dtype=int)
    with pytest.raises(NotImplementedError):
        space.sample()
Пример #14
0
def test_observation_space_list(env: CompilerEnv):
    env.reset()
    env.observation.spaces = {
        "ir": Sequence(size_range=(0, None)),
        "features": Box(shape=(3, ), low=-100, high=100),
    }
Пример #15
0
def test_convert_to_bytes_sequence_space():
    seq = Sequence(name=None, dtype=bytes, size_range=(1, 2))
    converted_seq = py_converters.convert_to_bytes_sequence_space(seq)
    assert isinstance(converted_seq, BytesSequenceSpace)
    assert converted_seq.length_range.min == 1
    assert converted_seq.length_range.max == 2
Пример #16
0
    def __init__(
        self,
        *args,
        benchmark: Optional[Union[str, Benchmark]] = None,
        datasets_site_path: Optional[Path] = None,
        **kwargs,
    ):
        # First perform a one-time download of LLVM binaries that are needed by
        # the LLVM service and are not included by the pip-installed package.
        download_llvm_files()
        self.inst2vec = _INST2VEC_ENCODER
        super().__init__(
            *args,
            **kwargs,
            # Set a default benchmark for use.
            benchmark=benchmark or "cbench-v1/qsort",
            datasets=_get_llvm_datasets(site_data_base=datasets_site_path),
            rewards=[
                CostFunctionReward(
                    name="IrInstructionCount",
                    cost_function="IrInstructionCount",
                    init_cost_function="IrInstructionCountO0",
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=False,
                ),
                NormalizedReward(
                    name="IrInstructionCountNorm",
                    cost_function="IrInstructionCount",
                    init_cost_function="IrInstructionCountO0",
                    max=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=False,
                ),
                BaselineImprovementNormalizedReward(
                    name="IrInstructionCountO3",
                    cost_function="IrInstructionCount",
                    baseline_cost_function="IrInstructionCountO3",
                    init_cost_function="IrInstructionCountO0",
                    success_threshold=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=False,
                ),
                BaselineImprovementNormalizedReward(
                    name="IrInstructionCountOz",
                    cost_function="IrInstructionCount",
                    baseline_cost_function="IrInstructionCountOz",
                    init_cost_function="IrInstructionCountO0",
                    success_threshold=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=False,
                ),
                CostFunctionReward(
                    name="ObjectTextSizeBytes",
                    cost_function="ObjectTextSizeBytes",
                    init_cost_function="ObjectTextSizeO0",
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
                NormalizedReward(
                    name="ObjectTextSizeNorm",
                    cost_function="ObjectTextSizeBytes",
                    init_cost_function="ObjectTextSizeO0",
                    max=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
                BaselineImprovementNormalizedReward(
                    name="ObjectTextSizeO3",
                    cost_function="ObjectTextSizeBytes",
                    init_cost_function="ObjectTextSizeO0",
                    baseline_cost_function="ObjectTextSizeO3",
                    success_threshold=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
                BaselineImprovementNormalizedReward(
                    name="ObjectTextSizeOz",
                    cost_function="ObjectTextSizeBytes",
                    init_cost_function="ObjectTextSizeO0",
                    baseline_cost_function="ObjectTextSizeOz",
                    success_threshold=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
                CostFunctionReward(
                    name="TextSizeBytes",
                    cost_function="TextSizeBytes",
                    init_cost_function="TextSizeO0",
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
                NormalizedReward(
                    name="TextSizeNorm",
                    cost_function="TextSizeBytes",
                    init_cost_function="TextSizeO0",
                    max=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
                BaselineImprovementNormalizedReward(
                    name="TextSizeO3",
                    cost_function="TextSizeBytes",
                    init_cost_function="TextSizeO0",
                    baseline_cost_function="TextSizeO3",
                    success_threshold=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
                BaselineImprovementNormalizedReward(
                    name="TextSizeOz",
                    cost_function="TextSizeBytes",
                    init_cost_function="TextSizeO0",
                    baseline_cost_function="TextSizeOz",
                    success_threshold=1,
                    default_negates_returns=True,
                    deterministic=True,
                    platform_dependent=True,
                ),
            ],
            derived_observation_spaces=[
                {
                    "id":
                    "Inst2vecPreprocessedText",
                    "base_id":
                    "Ir",
                    "space":
                    Sequence(name="Inst2vecPreprocessedText",
                             size_range=(0, None),
                             dtype=str),
                    "translate":
                    self.inst2vec.preprocess,
                    "default_value":
                    "",
                },
                {
                    "id":
                    "Inst2vecEmbeddingIndices",
                    "base_id":
                    "Ir",
                    "space":
                    Sequence(
                        name="Inst2vecEmbeddingIndices",
                        size_range=(0, None),
                        dtype=np.int32,
                    ),
                    "translate":
                    lambda base_observation: self.inst2vec.encode(
                        self.inst2vec.preprocess(base_observation)),
                    "default_value":
                    np.array([self.inst2vec.vocab["!UNK"]]),
                },
                {
                    "id":
                    "Inst2vec",
                    "base_id":
                    "Ir",
                    "space":
                    Sequence(name="Inst2vec",
                             size_range=(0, None),
                             dtype=np.ndarray),
                    "translate":
                    lambda base_observation: self.inst2vec.embed(
                        self.inst2vec.encode(
                            self.inst2vec.preprocess(base_observation))),
                    "default_value":
                    np.vstack([
                        self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]]
                    ]),
                },
                {
                    "id":
                    "InstCountDict",
                    "base_id":
                    "InstCount",
                    "space":
                    DictSpace(
                        {
                            f"{name}Count": Scalar(name=f"{name}Count",
                                                   min=0,
                                                   max=None,
                                                   dtype=int)
                            for name in INST_COUNT_FEATURE_NAMES
                        },
                        name="InstCountDict",
                    ),
                    "translate":
                    lambda base_observation: {
                        f"{name}Count": val
                        for name, val in zip(INST_COUNT_FEATURE_NAMES,
                                             base_observation)
                    },
                },
                {
                    "id":
                    "InstCountNorm",
                    "base_id":
                    "InstCount",
                    "space":
                    Box(
                        name="InstCountNorm",
                        low=0,
                        high=1,
                        shape=(len(INST_COUNT_FEATURE_NAMES) - 1, ),
                        dtype=np.float32,
                    ),
                    "translate":
                    lambda base_observation: (base_observation[1:] / max(
                        base_observation[0], 1)).astype(np.float32),
                },
                {
                    "id":
                    "InstCountNormDict",
                    "base_id":
                    "InstCountNorm",
                    "space":
                    DictSpace(
                        {
                            f"{name}Density": Scalar(name=f"{name}Density",
                                                     min=0,
                                                     max=None,
                                                     dtype=int)
                            for name in INST_COUNT_FEATURE_NAMES[1:]
                        },
                        name="InstCountNormDict",
                    ),
                    "translate":
                    lambda base_observation: {
                        f"{name}Density": val
                        for name, val in zip(INST_COUNT_FEATURE_NAMES[1:],
                                             base_observation)
                    },
                },
                {
                    "id":
                    "AutophaseDict",
                    "base_id":
                    "Autophase",
                    "space":
                    DictSpace(
                        {
                            name: Scalar(name=name, min=0, max=None, dtype=int)
                            for name in AUTOPHASE_FEATURE_NAMES
                        },
                        name="AutophaseDict",
                    ),
                    "translate":
                    lambda base_observation: {
                        name: val
                        for name, val in zip(AUTOPHASE_FEATURE_NAMES,
                                             base_observation)
                    },
                },
            ],
        )

        # Mutable runtime configuration options that must be set on every call
        # to reset.
        self._runtimes_per_observation_count: Optional[int] = None
        self._runtimes_warmup_per_observation_count: Optional[int] = None

        cpu_info_spaces = [
            Sequence(name="name", size_range=(0, None), dtype=str),
            Scalar(name="cores_count", min=None, max=None, dtype=int),
            Scalar(name="l1i_cache_size", min=None, max=None, dtype=int),
            Scalar(name="l1i_cache_count", min=None, max=None, dtype=int),
            Scalar(name="l1d_cache_size", min=None, max=None, dtype=int),
            Scalar(name="l1d_cache_count", min=None, max=None, dtype=int),
            Scalar(name="l2_cache_size", min=None, max=None, dtype=int),
            Scalar(name="l2_cache_count", min=None, max=None, dtype=int),
            Scalar(name="l3_cache_size", min=None, max=None, dtype=int),
            Scalar(name="l3_cache_count", min=None, max=None, dtype=int),
            Scalar(name="l4_cache_size", min=None, max=None, dtype=int),
            Scalar(name="l4_cache_count", min=None, max=None, dtype=int),
        ]
        self.observation.spaces["CpuInfo"].space = DictSpace(
            {space.name: space
             for space in cpu_info_spaces},
            name="CpuInfo",
        )
Пример #17
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.inst2vec = Inst2vecEncoder()

        self.register_derived_space(
            base_name="CpuInfo",
            derived_name="CpuInfoDict",
            derived_space=DictSpace({
                "name":
                Sequence(size_range=(0, None), dtype=str),
                "cores_count":
                Scalar(min=None, max=None, dtype=int),
                "l1i_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l1i_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l1d_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l1d_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l2_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l2_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l3_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l3_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l4_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l4_cache_count":
                Scalar(min=None, max=None, dtype=int),
            }),
            cb=lambda base_observation: base_observation,
        )

        self.register_derived_space(
            base_name="Ir",
            derived_name="Inst2vecPreprocessedText",
            derived_space=Sequence(size_range=(0, None), dtype=str),
            cb=lambda base_observation: self.inst2vec.preprocess(
                base_observation),
        )
        self.register_derived_space(
            base_name="Ir",
            derived_name="Inst2vecEmbeddingIndices",
            derived_space=Sequence(size_range=(0, None), dtype=np.int32),
            cb=lambda base_observation: self.inst2vec.encode(
                self.inst2vec.preprocess(base_observation)),
        )
        self.register_derived_space(
            base_name="Ir",
            derived_name="Inst2vec",
            derived_space=Sequence(size_range=(0, None), dtype=np.ndarray),
            cb=lambda base_observation: self.inst2vec.embed(
                self.inst2vec.encode(self.inst2vec.preprocess(base_observation)
                                     )),
        )

        self.register_derived_space(
            base_name="Autophase",
            derived_name="AutophaseDict",
            derived_space=DictSpace({
                name: Scalar(min=0, max=None, dtype=int)
                for name in AUTOPHASE_FEATURE_NAMES
            }),
            cb=lambda base_observation: {
                name: val
                for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation)
            },
        )
Пример #18
0
def test_convert_to_string_space():
    space = Sequence(name=None, size_range=(1, 2), dtype=str)
    converted_space = py_converters.convert_to_string_space(space)
    assert isinstance(converted_space, StringSpace)
    assert converted_space.length_range.min == 1
    assert converted_space.length_range.max == 2
 def make_seq(scalar_range, dtype, defaults):
     return Sequence(
         size_range=scalar_range2tuple(scalar_range, defaults),
         dtype=dtype,
         opaque_data_format=proto.opaque_data_format,
     )
Пример #20
0
def test_str_contains_too_long():
    space = Sequence(size_range=(0, 4), dtype=str)
    assert not space.contains("Hello, world!")
    assert space.contains("")
    assert not space.contains([1, 2, 3])