def test_read_wavfile(self): sample_rate, wav = read_wavfile(self.wav_filepath) self.assertEqual(16000, sample_rate) self.assertEqual(len(wav), 14118) # 16-bit PCM self.assertGreaterEqual(min(wav), -32768) self.assertLessEqual(max(wav), 32768)
def test_preprocess_wav(self): _, wav = read_wavfile(self.wav_filepath) wav_distorted = preprocess_wav(wav, distort=True) wav_preprocessed = preprocess_wav(wav, distort=False) self.assertEqual(16000, len(wav_distorted)) self.assertEqual(16000, len(wav_preprocessed)) self.assertGreaterEqual(1, np.abs(wav_distorted).max()) self.assertGreaterEqual(1, np.abs(wav_preprocessed).max()) self.assertLess(500, len(set(wav_preprocessed))) self.assertLess(500, len(set(wav_distorted)))
def load_random_real_noise_clip(data_version: str) -> np.array: """ Loads a random noise clip from the _background_noise_ folder and returns it :param data_version: specifies the version of the data to use (str {"0.01", "0.02"}) :return: real noise clip (np.array) """ path = os.path.join(get_training_data_path(data_version=data_version), "_background_noise_") filename = random.choice(list(filter(lambda x: x.endswith(".wav"), os.listdir(path)))) _, clip = read_wavfile(os.path.join(path, filename)) return clip
def test_resample_wavfile(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = resample_wavfile(wav=wav, factor=1) self.assertEqual(len(wav), len(wav_processed)) self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001) wav_processed = resample_wavfile(wav=wav, factor=2) self.assertEqual(len(wav), len(wav_processed)) self.assertLess(np.std(wav), np.std(wav_processed)) wav_processed = resample_wavfile(wav=wav, factor=0.5) self.assertEqual(len(wav), len(wav_processed)) self.assertGreater(np.std(wav), np.std(wav_processed))
def load_real_noise_clips(data_version: str) -> np.array: """ Loads all the available real noise clips, which are located under the _background_noise_ folder :param data_version: specifies the version of the data to use (str {"0.01", "0.02"}) :return: list of real noise clips (list of np.array) """ clips = [] path = os.path.join(get_training_data_path(data_version=data_version), "_background_noise_") for filename in filter(lambda x: x.endswith(".wav"), os.listdir(path)): _, wav = read_wavfile(os.path.join(path, filename)) clips.append(wav) return clips
def test_saturate_wavfile(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav = normalize_wavfile(wav, normalize_to_peak=True) wav_processed = saturate_wavfile(wav, 1) self.assertListEqual(wav.tolist(), wav_processed.tolist()) wav_processed = saturate_wavfile(wav, 2) # Atenuate self.assertAlmostEqual(wav.mean(), wav_processed.mean(), delta=0.05) self.assertGreater(np.std(wav), np.std(wav_processed)) self.assertGreater(max(abs(wav)), max(abs(wav_processed))) wav_processed = saturate_wavfile(wav, 0.2) # Saturate self.assertAlmostEqual(wav.mean(), wav_processed.mean(), delta=0.05) self.assertLess(np.std(wav), np.std(wav_processed)) self.assertEqual(max(abs(wav)), max(abs(wav_processed)))
def test_generate_augmented_wav(self): filepath_augmented = os.path.join( get_augmented_data_path(data_version=self.data_version), "testfolder", "examples", "testaudio.wav") if os.path.exists(filepath_augmented): os.remove(filepath_augmented) generate_augmented_wav(data_version=self.data_version, filepath=self.wav_filepath, folder="testfolder") self.assertTrue(os.path.exists(filepath_augmented)) _, wav = read_wavfile(filepath_augmented) self.assertEqual(16000, len(wav)) self.assertGreaterEqual(1, np.abs(wav).max()) os.remove(filepath_augmented) shutil.rmtree(os.path.split(os.path.split(filepath_augmented)[0])[0])
def test_fix_wavfile_length(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = fix_wavfile_length(wav, 8000) # Cut self.assertEqual(8000, len(wav_processed)) self.assertAlmostEqual(wav_processed.mean(), wav.mean(), delta=0.05) wav_processed = fix_wavfile_length(wav, 30000) # Pad self.assertEqual(30000, len(wav_processed)) self.assertEqual(wav_processed.sum(), wav.sum()) self.assertListEqual([0] * 1000, wav_processed[:1000].tolist()) self.assertListEqual([0] * 1000, wav_processed[-1000:].tolist()) wav_processed = fix_wavfile_length(wav, 30001) # Pad odd self.assertEqual(30001, len(wav_processed)) self.assertEqual(wav_processed.sum(), wav.sum()) self.assertListEqual([0] * 1000, wav_processed[:1000].tolist()) self.assertListEqual([0] * 1000, wav_processed[-1000:].tolist())
def test_fix_wavfile_duration(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = fix_wavfile_duration(wav=wav, sample_rate=sample_rate, duration=len(wav) / sample_rate) self.assertListEqual(list(wav), list(wav_processed)) wav_processed = fix_wavfile_duration(wav=wav, sample_rate=sample_rate, duration=0.3) self.assertEqual(len(wav_processed), int(0.3 * sample_rate)) wav_processed = fix_wavfile_duration(wav=wav, sample_rate=sample_rate, duration=3) self.assertEqual(len(wav_processed), int(3 * sample_rate)) self.assertListEqual([0] * 16000, list(wav_processed[:16000])) self.assertListEqual([0] * 16000, list(wav_processed[-16000:])) self.assertEqual(np.sum(wav), np.sum(wav_processed))
def generate_augmented_wav(data_version: str, filepath: str, folder: str) -> None: """ Given a filepath of a wav file, loads it, preprocesses it and stores it in the default folder for augmentations. :param data_version: specifies the version of the data to use (str {"0.01", "0.02"}) :param filepath: file path of an existing wav file (str) :param folder: Name of the folder which will contain this version of the augmentation (str) :return: None (void) """ try: folder_class = os.path.split(os.path.split(filepath)[-2])[-1] output_path = os.path.join(get_augmented_data_folder(data_version=data_version, folder=folder), folder_class) if not os.path.exists(output_path): os.makedirs(output_path) sample_rate, wav = read_wavfile(filepath) wav = preprocess_wav(wav, distort=True) # Add suffix filename = os.path.split(filepath)[-1] output_filepath = os.path.join(output_path, filename) wavfile.write(output_filepath, sample_rate, wav) except: print("Error found with file {}".format(filepath))
def test_time_offset(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = time_offset_wavfile(wav=wav, shift_factor=0) self.assertEqual(len(wav), len(wav_processed)) self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001) wav_processed = time_offset_wavfile(wav=wav, shift_factor=1) # Full_shift self.assertEqual(np.abs(wav_processed).sum(), 0) wav_processed = time_offset_wavfile( wav=wav, shift_factor=-1) # Full shift inverse self.assertEqual(np.abs(wav_processed).sum(), 0) wav_processed = time_offset_wavfile(wav=wav, shift_factor=0.5) # Half shift self.assertEqual(len(wav), len(wav_processed)) self.assertListEqual(wav.tolist()[:int(len(wav) / 2)], wav_processed.tolist()[-int(len(wav) / 2):]) wav_processed = time_offset_wavfile( wav=wav, shift_factor=-0.5) # Half shift inverse self.assertEqual(len(wav), len(wav_processed)) self.assertListEqual(wav.tolist()[-int(len(wav) / 2):], wav_processed.tolist()[:int(len(wav) / 2)])
def load_data(self, file_paths: list, add_noise: bool, load_targets: bool = True) -> None: self.audios = [] if load_targets: self.targets = [] # Load data for file_path in tqdm(file_paths): if os.path.exists(file_path): try: _, wav = read_wavfile(file_path) wav = preprocess_wav(wav, distort=False) self.audios.append(wav) if load_targets: target = os.path.split(os.path.split(file_path)[0])[1] self.targets.append(target) except: self.corrupted_file_paths.append(file_path) print(f"Error reading {file_path}") else: self.corrupted_file_paths.append(file_path) print(f"Fatal error, file {file_path} not found") if add_noise: n_artificial_noise_samples = int(0.05 * len(self.audios)) n_real_noise_samples = int(0.15 * len(self.audios)) n_empty_samples = int(0.0025 * len(self.audios)) for i in tqdm(range(n_real_noise_samples)): wav = get_random_real_noise_subclip(data_version=self.data_version, n_samples=16000, noise_clips=self.noise_clips) wav = preprocess_wav(wav, distort=False) self.audios.append(wav) if load_targets: self.targets.append("silence") for i in range(n_artificial_noise_samples): wav = generate_white_noise_clip(16000) wav = preprocess_wav(wav, distort=False) self.audios.append(wav) if load_targets: self.targets.append("silence") for i in range(n_empty_samples): self.audios.append(np.zeros_like(wav).astype(np.float32)) if load_targets: self.targets.append("silence")
def test_pitch_shift(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = pitch_shift_wavfile(wav=wav, sr=sample_rate, n_octaves=0) self.assertEqual(len(wav), len(wav_processed)) self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001) wav_processed = pitch_shift_wavfile(wav=wav, sr=sample_rate, n_octaves=0.5) self.assertEqual(len(wav), len(wav_processed)) # Check fundamental armonic movement fft_wav = np.fft.fft(wav) fft_wav = fft_wav[:int(len(fft_wav) / 2)] fft_wav = np.abs(fft_wav) fft_wav_proc = np.fft.fft(wav_processed) fft_wav_proc = fft_wav_proc[:int(len(fft_wav_proc) / 2)] fft_wav_proc = np.abs(fft_wav_proc) self.assertLess(np.argmax(fft_wav), np.argmax(fft_wav_proc)) self.assertEqual(np.max(np.abs(wav)), np.max(np.abs(wav_processed))) # Check peak wav_processed = pitch_shift_wavfile(wav=wav, sr=sample_rate, n_octaves=-0.5) self.assertEqual(len(wav), len(wav_processed)) # Check fundamental armonic movement fft_wav = np.fft.fft(wav) fft_wav = fft_wav[:int(len(fft_wav) / 2)] fft_wav = np.abs(fft_wav) fft_wav_proc = np.fft.fft(wav_processed) fft_wav_proc = fft_wav_proc[:int(len(fft_wav_proc) / 2)] fft_wav_proc = np.abs(fft_wav_proc) self.assertGreater(np.argmax(fft_wav), np.argmax(fft_wav_proc)) self.assertEqual(np.max(np.abs(wav)), np.max(np.abs(wav_processed))) # Check peak
def test_add_noise(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = add_noise_to_wavfile(wav=wav, amplitude_factor=0, clip_to_original_range=False) self.assertEqual(len(wav), len(wav_processed)) self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001) wav_processed = add_noise_to_wavfile(wav=wav, amplitude_factor=0, clip_to_original_range=True) self.assertEqual(len(wav), len(wav_processed)) self.assertAlmostEqual(wav.sum(), wav_processed.sum(), delta=0.00001) wav_processed = add_noise_to_wavfile(wav=wav, amplitude_factor=0.5, clip_to_original_range=False) self.assertEqual(len(wav), len(wav_processed)) self.assertNotEquals(np.abs(wav).max(), np.abs(wav_processed).max()) self.assertLess(np.std(wav), np.std(wav_processed)) wav_processed = add_noise_to_wavfile(wav=wav, amplitude_factor=0.5, clip_to_original_range=True) self.assertEqual(len(wav), len(wav_processed)) self.assertEquals(np.abs(wav).max(), np.abs(wav_processed).max())
def test_normalize_wavfile_formats(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_0 = wav / 32768 wav_1 = wav_0 * 2147483648 wav_2 = wav wav_3 = 255 * (wav_0 + 1) / 2 wav_0_processed = normalize_wavfile(wav_0, 0) wav_1_processed = normalize_wavfile(wav_1, 1) wav_2_processed = normalize_wavfile(wav_2, 2) wav_3_processed = normalize_wavfile(wav_3, 3) wavs_peak_processed = [ normalize_wavfile(wav, normalize_to_peak=True) for wav in [wav_0, wav_1, wav_2, wav_3] ] self.assertTrue( max(wav_0_processed) == max(wav_1_processed) == max( wav_2_processed) == max(wav_3_processed)) self.assertTrue( min(wav_0_processed) == min(wav_1_processed) == min( wav_2_processed) == min(wav_3_processed)) self.assertLessEqual(max(wav_0_processed), 1) self.assertAlmostEqual(max(wav_0_processed), 1, delta=0.75) self.assertGreaterEqual(min(wav_0_processed), -1) self.assertAlmostEqual(min(wav_0_processed), -1, delta=0.75) for wav_processed in wavs_peak_processed: self.assertGreaterEqual(1, wav_processed.max()) self.assertLessEqual(-1, wav_processed.max()) wav_processed = normalize_wavfile(np.zeros(16000), normalize_to_peak=True) self.assertEqual(0, np.abs(wav_processed).sum()) with self.assertRaises(ValueError): wav_corrupted = np.random.rand(16000) wav_corrupted[4000] = np.nan normalize_wavfile(wav_corrupted, normalize_to_peak=True) with self.assertRaises(ValueError): normalize_wavfile(np.ones(16000) * np.nan, normalize_to_peak=True)
def test_normalize_wavfile_to_peaks(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = normalize_wavfile(wav, normalize_to_peak=True) self.assertGreaterEqual(min(wav_processed), -1) self.assertLessEqual(max(wav_processed), 1) self.assertEqual(max(max(wav_processed), -min(wav_processed)), 1)
def test_draw_random_subclip(self): sample_rate, wav = read_wavfile(self.wav_filepath) subclip = draw_random_subclip(wav, 1000) self.assertEqual(1000, len(subclip)) self.assertLess(0.5, np.max(np.array(subclip) != 0))
def test_randomly_distort_wavfile(self): sample_rate, wav = read_wavfile(self.wav_filepath) wav_processed = randomly_distort_wavfile(wav=wav, sr=sample_rate) self.assertEqual(len(wav), len(wav_processed))