def test_populated_dataset_with_file_extension_filter(
        populated_dataset: FilesDataset):
    populated_dataset.benchmark_file_suffix = ".jpg"
    assert list(populated_dataset.benchmark_uris()) == [
        "benchmark://test-v0/g",
        "benchmark://test-v0/b/d",
    ]
    assert populated_dataset.size == 2
def populated_dataset(request) -> FilesDataset:
    with tempfile.TemporaryDirectory() as d:
        df = Path(d) / "files"
        (df / "a").mkdir(parents=True)
        (df / "b").mkdir()

        with open(df / "e.txt", "w") as f:
            f.write("e")
        (df / "f.txt").touch()
        (df / "g.jpg").touch()
        (df / "a" / "a.txt").touch()
        (df / "a" / "b.txt").touch()
        (df / "b" / "a.txt").touch()
        (df / "b" / "b.txt").touch()
        (df / "b" / "c.txt").touch()
        (df / "b" / "d.jpg").touch()

        yield FilesDataset(
            name="benchmark://test-v0",
            description="",
            license="MIT",
            dataset_root=Path(d) / "files",
            site_data_base=Path(d) / "site_data",
            memoize_uris=request.param == "memoized-ids",
        )
def empty_dataset() -> FilesDataset:
    with tempfile.TemporaryDirectory() as d:
        yield FilesDataset(
            name="benchmark://test-v0",
            description="",
            license="MIT",
            dataset_root=Path(d) / "files",
            site_data_base=Path(d) / "site_data",
        )
def test_populated_dataset_random_benchmark(populated_dataset: FilesDataset):
    num_benchmarks = 3
    rng = np.random.default_rng(0)
    random_benchmarks = {
        b.uri
        for b in (populated_dataset.random_benchmark(rng)
                  for _ in range(num_benchmarks))
    }
    assert len(random_benchmarks) == num_benchmarks
def test_populated_dataset(populated_dataset: FilesDataset):
    for _ in range(2):
        assert list(populated_dataset.benchmark_uris()) == [
            "benchmark://test-v0/e.txt",
            "benchmark://test-v0/f.txt",
            "benchmark://test-v0/g.jpg",
            "benchmark://test-v0/a/a.txt",
            "benchmark://test-v0/a/b.txt",
            "benchmark://test-v0/b/a.txt",
            "benchmark://test-v0/b/b.txt",
            "benchmark://test-v0/b/c.txt",
            "benchmark://test-v0/b/d.jpg",
        ]
        assert populated_dataset.size == 9
def test_populated_dataset_benchmark_lookup_not_found(
        populated_dataset: FilesDataset):
    with pytest.raises(
            LookupError,
            match=r"^Benchmark not found: benchmark://test-v0/not/a/file"):
        populated_dataset.benchmark("benchmark://test-v0/not/a/file")
def test_populated_dataset_first_file(populated_dataset: FilesDataset):
    bm = next(populated_dataset.benchmarks())
    assert bm.uri == "benchmark://test-v0/e.txt"
    assert bm.proto.uri == "benchmark://test-v0/e.txt"
    assert bm.proto.program.contents.decode("utf-8") == "e"
def test_populated_dataset_benchmark_lookup(populated_dataset: FilesDataset):
    bm = populated_dataset.benchmark("benchmark://test-v0/e.txt")
    assert bm.uri == "benchmark://test-v0/e.txt"
    assert bm.proto.uri == "benchmark://test-v0/e.txt"
    assert bm.proto.program.contents.decode("utf-8") == "e"
def test_empty_dataset(empty_dataset: FilesDataset):
    assert empty_dataset.size == 0
    assert list(empty_dataset.benchmark_uris()) == []
    assert list(empty_dataset.benchmarks()) == []