示例#1
0
def test_dataset_map(with_none):
    ds = datasets.Dataset.from_dict({"path": ["path1", "path2"]})

    def process_data(batch):
        batch = {
            "image": [
                np.array(
                    [
                        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                        [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
                        [[100, 200, 300], [400, 500, 600], [700, 800, 900]],
                    ]
                )
                for _ in batch["path"]
            ]
        }
        if with_none:
            batch["image"][0] = None
        return batch

    features = datasets.Features({"image": Array3D(dtype="int32", shape=(3, 3, 3))})
    processed_ds = ds.map(process_data, batched=True, remove_columns=ds.column_names, features=features)
    assert processed_ds.shape == (2, 1)
    with processed_ds.with_format("numpy") as pds:
        for i, example in enumerate(pds):
            assert "image" in example
            assert isinstance(example["image"], np.ndarray)
            assert example["image"].shape == (3, 3, 3)
            if with_none and i == 0:
                assert np.all(np.isnan(example["image"]))
示例#2
0
def test_dataset_map():
    ds = datasets.Dataset.from_dict({"path": ["path1", "path2"]})

    def process_data(batch):
        return {
            "image": [
                np.array([
                    [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                    [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
                    [[100, 200, 300], [400, 500, 600], [700, 800, 900]],
                ]) for _ in batch["path"]
            ]
        }

    features = datasets.Features(
        {"image": Array3D(dtype="int32", shape=(3, 3, 3))})
    processed_ds = ds.map(process_data,
                          batched=True,
                          remove_columns=ds.column_names,
                          features=features)
    assert processed_ds.shape == (2, 1)
    with processed_ds.with_format("numpy") as pds:
        for example in pds:
            assert "image" in example
            assert isinstance(example["image"], np.ndarray)
示例#3
0
 def get_two_col_datasset(self, first_dim_list, fixed_shape):
     features = datasets.Features(
         {"image": Array3D(shape=(None, *fixed_shape), dtype="float32"), "text": Value("string")}
     )
     dict_values = {
         "image": [np.random.rand(fdim, *fixed_shape).astype("float32") for fdim in first_dim_list],
         "text": ["text" for _ in first_dim_list],
     }
     dataset = datasets.Dataset.from_dict(dict_values, features=features)
     return dataset