def __init__(self,
                 data_path,
                 csv_local_path,
                 sr=8000,
                 is_train=True,
                 signal_length=2**16,
                 mix=False,
                 precision=np.float32,
                 n_files=None,
                 upsample_factor=1):

        self.signal_length = signal_length

        if is_train:
            self.transform = get_train_transform(length=signal_length)
        else:
            self.transform = get_test_transform(length=signal_length)

        self.sr = sr
        self.mix = mix
        self.precision = precision
        self.n_files = n_files
        self.upsample_factor = upsample_factor

        df = pd.read_csv(data_path + csv_local_path)

        if self.n_files is not None:
            df = df.sample(self.n_files)

        df['file_name'] = df['file_name'].apply(lambda x: '{}/{}'.format(
            data_path, '/'.join(
                ['callCenterDataset',
                 x.split('callCenterDataset/')[1]])))

        self.X = []
        self.y = []

        for idx, row in df.iterrows():
            v_start, v_end = row['v_start'], row['v_end']

            signal, sr = librosa.load(file_name,
                                      sr=self.sr,
                                      res_type='kaiser_fast')
            assert (len(signal[int(v_start * sr):int(v_end * sr)]) > 0)

            self.X.append(signal[int(v_start * sr):int(v_end * sr)])
            self.y.append(int(row['is_human']))

        self.n_classes = len(set(self.y))
Exemplo n.º 2
0
    def __init__(self,
                 root_dir,
                 sr=16000,
                 precision=np.float32,
                 is_train=True,
                 seed=42,
                 n_jobs=8,
                 signal_length=2**16,
                 verbose=0):
        self.root_dir = root_dir
        self.file_list = sorted(glob.glob(self.root_dir + '/**/*.*'))
        self.X = []
        self.y = []
        self.is_train = is_train
        self.n_jobs = n_jobs
        self.seed = seed
        self.sr = sr
        self.precision = precision

        self.signal_length = signal_length
        if is_train:
            self.transform = get_train_transform(length=signal_length)
        else:
            self.transform = get_test_transform(length=signal_length)

        ## reduce reading from 10 minutes to 42 seconds
        if verbose:
            iterable = tqdm(self.file_list)
        else:
            iterable = self.file_list
        parres = Parallel(n_jobs=self.n_jobs,
                          verbose=0)(delayed(self.__reader__)(f)
                                     for f in iterable)
        for wave, label in parres:
            self.X.append(wave)
            self.y.append(label)
        self.X = np.array(self.X)
        self.y = np.array(self.y)
        self.le = LabelsToOneHot(self.y)
        assert len(self.X) == len(self.y)
Exemplo n.º 3
0
    def __init__(self,
                 data_path,
                 dataset_name,
                 sr,
                 exclude,
                 is_train=True,
                 signal_length=2**16,
                 mix=False,
                 precision=np.float32):

        self.signal_length = signal_length

        if is_train:
            self.transform = get_train_transform(length=signal_length)
        else:
            self.transform = get_test_transform(length=signal_length)

        self.sr = sr
        self.mix = mix
        self.precision = precision
        data_set = np.load(
            os.path.join(data_path, dataset_name,
                         'wav{}.npz'.format(sr // 1000)))

        self.X = []
        self.y = []
        for fold_name in data_set.keys():
            if int(fold_name[4:]) in exclude:
                continue

            sounds = data_set[fold_name].item()['sounds']
            labels = data_set[fold_name].item()['labels']

            self.X.extend(sounds)
            self.y.extend(labels)

        self.n_classes = len(set(self.y))
Exemplo n.º 4
0
        return self.n

    def __getitem__(self, index):
        X, y, label_name = self.X[index], self.y[index], self.label_name[index]

        if self.transforms:
            X = tensor_to_numpy(self.transforms(X.reshape((1, -1, 1))))

        if self.one_hot_labels:
            y = self.one_hot_encoder(y)[0, :]

        return {"sound": X, "class": y, "class_label": label_name}


if __name__ == "__main__":
    dataset = GTANZDataset("../genres16_test.npz",
                           transforms=get_train_transform(length=2**14),
                           one_hot_labels=True)
    print(len(dataset))
    print(dataset[5])

    params = {'batch_size': 64, 'shuffle': True, 'num_workers': 1}
    dataset = GTANZDataset("../genres16_test.npz",
                           transforms=get_test_transform(length=2**14),
                           one_hot_labels=True)
    test_generator = ValidationDataLoader(dataset, **params)
    for batch in test_generator:
        print(batch['sound'].shape)
        print(batch)
        break
Exemplo n.º 5
0
            yield result


if __name__ == "__main__":
    from misc.transforms import get_train_transform, get_test_transform
    from librispeech.torch_readers.dataset_tfrecord import TFRecordDataset

    params = {'batch_size': 64, 'shuffle': False, 'num_workers': 1}

    dataset = TFRecordDataset("../librispeach/test-clean-100_wav16.tfrecord",
                              get_train_transform(16000),
                              16000,
                              in_memory=False)
    test_generator = LibriSpeechTFRecordDataLoader(dataset, **params)
    for batch in test_generator:
        print(batch['sound'].shape)
        print(batch)
        break

    params = {'batch_size': 64, 'shuffle': False, 'num_workers': 1}

    dataset = TFRecordDataset("../librispeach/test-clean-100_wav16.tfrecord",
                              get_test_transform(16000),
                              16000,
                              in_memory=False)
    test_generator = LibriSpeechTFRecordTestDataLoader(dataset, **params)
    for batch in test_generator:
        print(batch['sound'].shape)
        print(batch)
        break