Exemplo n.º 1
0
def test_encode_batch_with_example_with_empty_first_elem():
    features = Features({
        "x": Sequence(Sequence(ClassLabel(names=["a", "b"]))),
    })
    encoded_batch = features.encode_batch(
        {"x": [
            [["a"], ["b"]],
            [[], ["b"]],
        ]})
    assert encoded_batch == {"x": [[[0], [1]], [[], [1]]]}
def test_dataset_with_audio_feature_map_is_not_decoded(shared_datadir):
    audio_path = str(shared_datadir / "test_audio_44100.wav")
    data = {"audio": [audio_path], "text": ["Hello"]}
    features = Features({"audio": Audio(), "text": Value("string")})
    dset = Dataset.from_dict(data, features=features)

    expected_audio = features.encode_batch(data)["audio"][0]
    for item in dset._iter(decoded=False):
        assert item.keys() == {"audio", "text"}
        assert item == {"audio": expected_audio, "text": "Hello"}

    def process_text(example):
        example["text"] = example["text"] + " World!"
        return example

    processed_dset = dset.map(process_text)
    for item in processed_dset._iter(decoded=False):
        assert item.keys() == {"audio", "text"}
        assert item == {"audio": expected_audio, "text": "Hello World!"}