示例#1
0
 def test_read_wavfile(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     self.assertEqual(16000, sample_rate)
     self.assertEqual(len(wav), 14118)
     # 16-bit PCM
     self.assertGreaterEqual(min(wav), -32768)
     self.assertLessEqual(max(wav), 32768)
 def test_preprocess_wav(self):
     _, wav = read_wavfile(self.wav_filepath)
     wav_distorted = preprocess_wav(wav, distort=True)
     wav_preprocessed = preprocess_wav(wav, distort=False)
     self.assertEqual(16000, len(wav_distorted))
     self.assertEqual(16000, len(wav_preprocessed))
     self.assertGreaterEqual(1, np.abs(wav_distorted).max())
     self.assertGreaterEqual(1, np.abs(wav_preprocessed).max())
     self.assertLess(500, len(set(wav_preprocessed)))
     self.assertLess(500, len(set(wav_distorted)))
示例#3
0
def load_random_real_noise_clip(data_version: str) -> np.array:
    """
    Loads a random noise clip from the _background_noise_ folder and returns it
    :param data_version: specifies the version of the data to use (str {"0.01", "0.02"})
    :return: real noise clip (np.array)
    """
    path = os.path.join(get_training_data_path(data_version=data_version), "_background_noise_")
    filename = random.choice(list(filter(lambda x: x.endswith(".wav"), os.listdir(path))))
    _, clip = read_wavfile(os.path.join(path, filename))
    return clip
示例#4
0
 def test_resample_wavfile(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     wav_processed = resample_wavfile(wav=wav, factor=1)
     self.assertEqual(len(wav), len(wav_processed))
     self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001)
     wav_processed = resample_wavfile(wav=wav, factor=2)
     self.assertEqual(len(wav), len(wav_processed))
     self.assertLess(np.std(wav), np.std(wav_processed))
     wav_processed = resample_wavfile(wav=wav, factor=0.5)
     self.assertEqual(len(wav), len(wav_processed))
     self.assertGreater(np.std(wav), np.std(wav_processed))
示例#5
0
def load_real_noise_clips(data_version: str) -> np.array:
    """
    Loads all the available real noise clips, which are located under the _background_noise_ folder
    :param data_version: specifies the version of the data to use (str {"0.01", "0.02"})
    :return: list of real noise clips (list of np.array)
    """
    clips = []
    path = os.path.join(get_training_data_path(data_version=data_version), "_background_noise_")
    for filename in filter(lambda x: x.endswith(".wav"), os.listdir(path)):
        _, wav = read_wavfile(os.path.join(path, filename))
        clips.append(wav)
    return clips
示例#6
0
 def test_saturate_wavfile(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     wav = normalize_wavfile(wav, normalize_to_peak=True)
     wav_processed = saturate_wavfile(wav, 1)
     self.assertListEqual(wav.tolist(), wav_processed.tolist())
     wav_processed = saturate_wavfile(wav, 2)  # Atenuate
     self.assertAlmostEqual(wav.mean(), wav_processed.mean(), delta=0.05)
     self.assertGreater(np.std(wav), np.std(wav_processed))
     self.assertGreater(max(abs(wav)), max(abs(wav_processed)))
     wav_processed = saturate_wavfile(wav, 0.2)  # Saturate
     self.assertAlmostEqual(wav.mean(), wav_processed.mean(), delta=0.05)
     self.assertLess(np.std(wav), np.std(wav_processed))
     self.assertEqual(max(abs(wav)), max(abs(wav_processed)))
 def test_generate_augmented_wav(self):
     filepath_augmented = os.path.join(
         get_augmented_data_path(data_version=self.data_version),
         "testfolder", "examples", "testaudio.wav")
     if os.path.exists(filepath_augmented):
         os.remove(filepath_augmented)
     generate_augmented_wav(data_version=self.data_version,
                            filepath=self.wav_filepath,
                            folder="testfolder")
     self.assertTrue(os.path.exists(filepath_augmented))
     _, wav = read_wavfile(filepath_augmented)
     self.assertEqual(16000, len(wav))
     self.assertGreaterEqual(1, np.abs(wav).max())
     os.remove(filepath_augmented)
     shutil.rmtree(os.path.split(os.path.split(filepath_augmented)[0])[0])
示例#8
0
 def test_fix_wavfile_length(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     wav_processed = fix_wavfile_length(wav, 8000)  # Cut
     self.assertEqual(8000, len(wav_processed))
     self.assertAlmostEqual(wav_processed.mean(), wav.mean(), delta=0.05)
     wav_processed = fix_wavfile_length(wav, 30000)  # Pad
     self.assertEqual(30000, len(wav_processed))
     self.assertEqual(wav_processed.sum(), wav.sum())
     self.assertListEqual([0] * 1000, wav_processed[:1000].tolist())
     self.assertListEqual([0] * 1000, wav_processed[-1000:].tolist())
     wav_processed = fix_wavfile_length(wav, 30001)  # Pad odd
     self.assertEqual(30001, len(wav_processed))
     self.assertEqual(wav_processed.sum(), wav.sum())
     self.assertListEqual([0] * 1000, wav_processed[:1000].tolist())
     self.assertListEqual([0] * 1000, wav_processed[-1000:].tolist())
示例#9
0
    def test_fix_wavfile_duration(self):
        sample_rate, wav = read_wavfile(self.wav_filepath)

        wav_processed = fix_wavfile_duration(wav=wav,
                                             sample_rate=sample_rate,
                                             duration=len(wav) / sample_rate)
        self.assertListEqual(list(wav), list(wav_processed))

        wav_processed = fix_wavfile_duration(wav=wav,
                                             sample_rate=sample_rate,
                                             duration=0.3)
        self.assertEqual(len(wav_processed), int(0.3 * sample_rate))

        wav_processed = fix_wavfile_duration(wav=wav,
                                             sample_rate=sample_rate,
                                             duration=3)
        self.assertEqual(len(wav_processed), int(3 * sample_rate))
        self.assertListEqual([0] * 16000, list(wav_processed[:16000]))
        self.assertListEqual([0] * 16000, list(wav_processed[-16000:]))
        self.assertEqual(np.sum(wav), np.sum(wav_processed))
示例#10
0
def generate_augmented_wav(data_version: str, filepath: str, folder: str) -> None:
    """
    Given a filepath of a wav file, loads it, preprocesses it and stores it in the default folder for augmentations.
    :param data_version: specifies the version of the data to use (str {"0.01", "0.02"})
    :param filepath:  file path of an existing wav file (str)
    :param folder: Name of the folder which will contain this version of the augmentation (str)
    :return: None (void)
    """
    try:
        folder_class = os.path.split(os.path.split(filepath)[-2])[-1]
        output_path = os.path.join(get_augmented_data_folder(data_version=data_version, folder=folder), folder_class)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        sample_rate, wav = read_wavfile(filepath)
        wav = preprocess_wav(wav, distort=True)
        # Add suffix
        filename = os.path.split(filepath)[-1]
        output_filepath = os.path.join(output_path, filename)
        wavfile.write(output_filepath, sample_rate, wav)
    except:
        print("Error found with file {}".format(filepath))
示例#11
0
 def test_time_offset(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     wav_processed = time_offset_wavfile(wav=wav, shift_factor=0)
     self.assertEqual(len(wav), len(wav_processed))
     self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001)
     wav_processed = time_offset_wavfile(wav=wav,
                                         shift_factor=1)  # Full_shift
     self.assertEqual(np.abs(wav_processed).sum(), 0)
     wav_processed = time_offset_wavfile(
         wav=wav, shift_factor=-1)  # Full shift inverse
     self.assertEqual(np.abs(wav_processed).sum(), 0)
     wav_processed = time_offset_wavfile(wav=wav,
                                         shift_factor=0.5)  # Half shift
     self.assertEqual(len(wav), len(wav_processed))
     self.assertListEqual(wav.tolist()[:int(len(wav) / 2)],
                          wav_processed.tolist()[-int(len(wav) / 2):])
     wav_processed = time_offset_wavfile(
         wav=wav, shift_factor=-0.5)  # Half shift inverse
     self.assertEqual(len(wav), len(wav_processed))
     self.assertListEqual(wav.tolist()[-int(len(wav) / 2):],
                          wav_processed.tolist()[:int(len(wav) / 2)])
示例#12
0
 def load_data(self, file_paths: list, add_noise: bool, load_targets: bool = True) -> None:
     self.audios = []
     if load_targets:
         self.targets = []
     # Load data
     for file_path in tqdm(file_paths):
         if os.path.exists(file_path):
             try:
                 _, wav = read_wavfile(file_path)
                 wav = preprocess_wav(wav, distort=False)
                 self.audios.append(wav)
                 if load_targets:
                     target = os.path.split(os.path.split(file_path)[0])[1]
                     self.targets.append(target)
             except:
                 self.corrupted_file_paths.append(file_path)
                 print(f"Error reading {file_path}")
         else:
             self.corrupted_file_paths.append(file_path)
             print(f"Fatal error, file {file_path} not found")
     if add_noise:
         n_artificial_noise_samples = int(0.05 * len(self.audios))
         n_real_noise_samples = int(0.15 * len(self.audios))
         n_empty_samples = int(0.0025 * len(self.audios))
         for i in tqdm(range(n_real_noise_samples)):
             wav = get_random_real_noise_subclip(data_version=self.data_version, n_samples=16000,
                                                 noise_clips=self.noise_clips)
             wav = preprocess_wav(wav, distort=False)
             self.audios.append(wav)
             if load_targets: self.targets.append("silence")
         for i in range(n_artificial_noise_samples):
             wav = generate_white_noise_clip(16000)
             wav = preprocess_wav(wav, distort=False)
             self.audios.append(wav)
             if load_targets: self.targets.append("silence")
         for i in range(n_empty_samples):
             self.audios.append(np.zeros_like(wav).astype(np.float32))
             if load_targets: self.targets.append("silence")
示例#13
0
    def test_pitch_shift(self):
        sample_rate, wav = read_wavfile(self.wav_filepath)
        wav_processed = pitch_shift_wavfile(wav=wav,
                                            sr=sample_rate,
                                            n_octaves=0)
        self.assertEqual(len(wav), len(wav_processed))
        self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001)

        wav_processed = pitch_shift_wavfile(wav=wav,
                                            sr=sample_rate,
                                            n_octaves=0.5)
        self.assertEqual(len(wav), len(wav_processed))
        # Check fundamental armonic movement
        fft_wav = np.fft.fft(wav)
        fft_wav = fft_wav[:int(len(fft_wav) / 2)]
        fft_wav = np.abs(fft_wav)
        fft_wav_proc = np.fft.fft(wav_processed)
        fft_wav_proc = fft_wav_proc[:int(len(fft_wav_proc) / 2)]
        fft_wav_proc = np.abs(fft_wav_proc)
        self.assertLess(np.argmax(fft_wav), np.argmax(fft_wav_proc))
        self.assertEqual(np.max(np.abs(wav)),
                         np.max(np.abs(wav_processed)))  # Check peak

        wav_processed = pitch_shift_wavfile(wav=wav,
                                            sr=sample_rate,
                                            n_octaves=-0.5)
        self.assertEqual(len(wav), len(wav_processed))
        # Check fundamental armonic movement
        fft_wav = np.fft.fft(wav)
        fft_wav = fft_wav[:int(len(fft_wav) / 2)]
        fft_wav = np.abs(fft_wav)
        fft_wav_proc = np.fft.fft(wav_processed)
        fft_wav_proc = fft_wav_proc[:int(len(fft_wav_proc) / 2)]
        fft_wav_proc = np.abs(fft_wav_proc)
        self.assertGreater(np.argmax(fft_wav), np.argmax(fft_wav_proc))
        self.assertEqual(np.max(np.abs(wav)),
                         np.max(np.abs(wav_processed)))  # Check peak
示例#14
0
    def test_add_noise(self):
        sample_rate, wav = read_wavfile(self.wav_filepath)
        wav_processed = add_noise_to_wavfile(wav=wav,
                                             amplitude_factor=0,
                                             clip_to_original_range=False)
        self.assertEqual(len(wav), len(wav_processed))
        self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001)
        wav_processed = add_noise_to_wavfile(wav=wav,
                                             amplitude_factor=0,
                                             clip_to_original_range=True)
        self.assertEqual(len(wav), len(wav_processed))
        self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001)

        wav_processed = add_noise_to_wavfile(wav=wav,
                                             amplitude_factor=0.5,
                                             clip_to_original_range=False)
        self.assertEqual(len(wav), len(wav_processed))
        self.assertNotEquals(np.abs(wav).max(), np.abs(wav_processed).max())
        self.assertLess(np.std(wav), np.std(wav_processed))
        wav_processed = add_noise_to_wavfile(wav=wav,
                                             amplitude_factor=0.5,
                                             clip_to_original_range=True)
        self.assertEqual(len(wav), len(wav_processed))
        self.assertEquals(np.abs(wav).max(), np.abs(wav_processed).max())
示例#15
0
 def test_normalize_wavfile_formats(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     wav_0 = wav / 32768
     wav_1 = wav_0 * 2147483648
     wav_2 = wav
     wav_3 = 255 * (wav_0 + 1) / 2
     wav_0_processed = normalize_wavfile(wav_0, 0)
     wav_1_processed = normalize_wavfile(wav_1, 1)
     wav_2_processed = normalize_wavfile(wav_2, 2)
     wav_3_processed = normalize_wavfile(wav_3, 3)
     wavs_peak_processed = [
         normalize_wavfile(wav, normalize_to_peak=True)
         for wav in [wav_0, wav_1, wav_2, wav_3]
     ]
     self.assertTrue(
         max(wav_0_processed) == max(wav_1_processed) == max(
             wav_2_processed) == max(wav_3_processed))
     self.assertTrue(
         min(wav_0_processed) == min(wav_1_processed) == min(
             wav_2_processed) == min(wav_3_processed))
     self.assertLessEqual(max(wav_0_processed), 1)
     self.assertAlmostEqual(max(wav_0_processed), 1, delta=0.75)
     self.assertGreaterEqual(min(wav_0_processed), -1)
     self.assertAlmostEqual(min(wav_0_processed), -1, delta=0.75)
     for wav_processed in wavs_peak_processed:
         self.assertGreaterEqual(1, wav_processed.max())
         self.assertLessEqual(-1, wav_processed.max())
     wav_processed = normalize_wavfile(np.zeros(16000),
                                       normalize_to_peak=True)
     self.assertEqual(0, np.abs(wav_processed).sum())
     with self.assertRaises(ValueError):
         wav_corrupted = np.random.rand(16000)
         wav_corrupted[4000] = np.nan
         normalize_wavfile(wav_corrupted, normalize_to_peak=True)
     with self.assertRaises(ValueError):
         normalize_wavfile(np.ones(16000) * np.nan, normalize_to_peak=True)
示例#16
0
 def test_normalize_wavfile_to_peaks(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     wav_processed = normalize_wavfile(wav, normalize_to_peak=True)
     self.assertGreaterEqual(min(wav_processed), -1)
     self.assertLessEqual(max(wav_processed), 1)
     self.assertEqual(max(max(wav_processed), -min(wav_processed)), 1)
示例#17
0
 def test_draw_random_subclip(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     subclip = draw_random_subclip(wav, 1000)
     self.assertEqual(1000, len(subclip))
     self.assertLess(0.5, np.max(np.array(subclip) != 0))
示例#18
0
 def test_randomly_distort_wavfile(self):
     sample_rate, wav = read_wavfile(self.wav_filepath)
     wav_processed = randomly_distort_wavfile(wav=wav, sr=sample_rate)
     self.assertEqual(len(wav), len(wav_processed))