Пример #1
0
def test_ESPnetDataset_sound_scp(sound_scp):
    dataset = ESPnetDataset(
        path_name_type_list=[(sound_scp, "data1", "sound")],
        preprocess=preprocess,
    )
    print(dataset)
    print(dataset.names())

    _, data = dataset["a"]
    assert data["data1"].shape == (160000, )

    _, data = dataset["b"]
    assert data["data1"].shape == (80000, )
Пример #2
0
def test_ESPnetDataset_pipe_wav(pipe_wav):
    dataset = ESPnetDataset(
        path_name_type_list=[(pipe_wav, "data1", "pipe_wav")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert data["data1"].shape == (160000,)

    _, data = dataset["b"]
    assert data["data1"].shape == (80000,)
Пример #3
0
def test_ESPnetDataset_feats_scp(feats_scp,):
    dataset = ESPnetDataset(
        path_name_type_list=[(feats_scp, "data2", "kaldi_ark")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert data["data2"].shape == (100, 80,)

    _, data = dataset["b"]
    assert data["data2"].shape == (150, 80,)
Пример #4
0
def test_ESPnetDataset_csv_float(csv_float):
    dataset = ESPnetDataset(
        path_name_type_list=[(csv_float, "data8", "csv_float")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert all((data["data8"]) == np.array([1.4, 3.4], dtype=np.float32))

    _, data = dataset["b"]
    assert all((data["data8"]) == np.array([0.9, 9.3], dtype=np.float32))
Пример #5
0
def test_ESPnetDataset_csv_int(csv_int):
    dataset = ESPnetDataset(
        path_name_type_list=[(csv_int, "data8", "csv_int")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert tuple(data["data8"]) == (0, 1, 2)

    _, data = dataset["b"]
    assert tuple(data["data8"]) == (2, 3, 4)
Пример #6
0
def test_ESPnetDataset_text(text):
    dataset = ESPnetDataset(
        path_name_type_list=[(text, "data7", "text")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert tuple(data["data7"]) == (0,)

    _, data = dataset["b"]
    assert tuple(data["data7"]) == (1,)
Пример #7
0
def test_ESPnetDataset_h5file_1(h5file_1):
    dataset = ESPnetDataset(
        path_name_type_list=[(h5file_1, "data4", "hdf5")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert data["data4"].shape == (100, 80,)

    _, data = dataset["b"]
    assert data["data4"].shape == (150, 80,)
Пример #8
0
def test_ESPnetDataset_npy_scp(npy_scp):
    dataset = ESPnetDataset(
        path_name_type_list=[(npy_scp, "data3", "npy")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert data["data3"].shape == (100, 80,)

    _, data = dataset["b"]
    assert data["data3"].shape == (150, 80,)
Пример #9
0
def test_ESPnetDataset_rand_int(shape_file):
    dataset = ESPnetDataset(
        path_name_type_list=[(shape_file, "data6", "rand_int_0_10")],
        preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert data["data6"].shape == (100, 80,)

    _, data = dataset["b"]
    assert data["data6"].shape == (150, 80,)
Пример #10
0
def test_ESPnetDataset_h5file_2(h5file_2):
    dataset = ESPnetDataset(
        path_name_type_list=[(h5file_2, "data1", "hdf5")], preprocess=preprocess,
    )

    _, data = dataset["a"]
    assert data["data1_input"].shape == (100, 80)
    assert data["data1_target"].shape == (10,)

    _, data = dataset["b"]
    assert data["data1_input"].shape == (150, 80)
    assert data["data1_target"].shape == (13,)
Пример #11
0
    def __init__(
        self,
        path_name_type_list: Collection[Tuple[str, str, str]],
        preprocess: Callable[
            [str, Dict[str, np.ndarray]], Dict[str, np.ndarray]
        ] = None,
        float_dtype: str = "float32",
        int_dtype: str = "long",
        key_file: str = None,
    ):
        assert check_argument_types()
        if len(path_name_type_list) == 0:
            raise ValueError(
                '1 or more elements are required for "path_name_type_list"'
            )

        path_name_type_list = copy.deepcopy(path_name_type_list)
        self.preprocess = preprocess

        self.float_dtype = float_dtype
        self.int_dtype = int_dtype
        self.key_file = key_file

        self.debug_info = {}
        non_iterable_list = []
        self.path_name_type_list = []

        for path, name, _type in path_name_type_list:
            if name in self.debug_info:
                raise RuntimeError(f'"{name}" is duplicated for data-key')
            self.debug_info[name] = path, _type
            if _type not in DATA_TYPES:
                non_iterable_list.append((path, name, _type))
            else:
                self.path_name_type_list.append((path, name, _type))

        if len(non_iterable_list) != 0:
            # Some types doesn't support iterable mode
            self.non_iterable_dataset = ESPnetDataset(
                path_name_type_list=non_iterable_list,
                preprocess=preprocess,
                float_dtype=float_dtype,
                int_dtype=int_dtype,
            )
        else:
            self.non_iterable_dataset = None

        if Path(Path(path_name_type_list[0][0]).parent, "utt2category").exists():
            self.apply_utt2category = True
        else:
            self.apply_utt2category = False
Пример #12
0
def test_ESPnetDataset_imagefolder(imagefolder):
    pytest.importorskip("torchvision")

    dataset = ESPnetDataset(
        path_name_type_list=[(imagefolder, "data1", "imagefolder_32x32")],
        preprocess=preprocess,
    )

    _, data = dataset[0]
    assert data["data1_0"].shape == (3, 32, 32)
    assert data["data1_1"] == (0, )
    _, data = dataset[1]
    assert data["data1_0"].shape == (3, 32, 32)
    assert data["data1_1"] == (1, )