def sprengel_binary_mask_from_wave_file(filepath): fs, x = utils.read_wave_file(filepath) Sxx = sp.wave_to_amplitude_spectrogram(x, fs) Sxx_log = sp.wave_to_log_amplitude_spectrogram(x, fs) # plot spectrogram fig = plt.figure(1) subplot_image(Sxx_log, 411, "Spectrogram") Sxx = pp.normalize(Sxx) binary_image = pp.median_clipping(Sxx, 3.0) subplot_image(binary_image + 0, 412, "Median Clipping") binary_image = morphology.binary_erosion(binary_image, selem=np.ones((4, 4))) subplot_image(binary_image + 0, 413, "Erosion") binary_image = morphology.binary_dilation(binary_image, selem=np.ones((4, 4))) subplot_image(binary_image + 0, 414, "Dilation") mask = np.array([np.max(col) for col in binary_image.T]) mask = morphology.binary_dilation(mask, np.ones(4)) mask = morphology.binary_dilation(mask, np.ones(4)) # plot_vector(mask, "Mask") fig.set_size_inches(10, 12) plt.tight_layout() fig.savefig(utils.get_basename_without_ext(filepath) + "_binary_mask.png", dpi=100)
def load_wav_as_mfcc_delta(fname, target_size=None, noise_files=None, augment_with_noise=False, class_dir=None): (fs, signal) = utils.read_wave_file(fname) if class_dir: signal = da.same_class_augmentation(signal, class_dir) if augment_with_noise: signal = da.noise_augmentation(signal, noise_files) mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0]) mfcc_delta_3 = librosa.feature.delta(mfcc, width=3, order=1) mfcc_delta_11 = librosa.feature.delta(mfcc, width=11, order=1) mfcc_delta_19 = librosa.feature.delta(mfcc, width=19, order=1) if target_size: mfcc = scipy.misc.imresize(mfcc, target_size) mfcc_delta_3 = scipy.misc.imresize(mfcc_delta_3, target_size) mfcc_delta_11 = scipy.misc.imresize(mfcc_delta_11, target_size) mfcc_delta_19 = scipy.misc.imresize(mfcc_delta_19, target_size) mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1) mfcc_delta_3 = mfcc_delta_3.reshape(mfcc_delta_3.shape[0], mfcc_delta_3.shape[1], 1) mfcc_delta_11 = mfcc_delta_11.reshape(mfcc_delta_11.shape[0], mfcc_delta_11.shape[1], 1) mfcc_delta_19 = mfcc_delta_19.reshape(mfcc_delta_19.shape[0], mfcc_delta_19.shape[1], 1) mfcc_delta = np.concatenate( [mfcc, mfcc_delta_3, mfcc_delta_11, mfcc_delta_19], axis=2) return mfcc_delta
def noise_augmentation_from_dirs(noise_dir, class_dir): sig_paths = glob.glob(os.path.join(class_dir, "*.wav")) sig_path = np.random.choice(sig_paths, 1, replace=False)[0] (fs, sig) = utils.read_wave_file(sig_path) aug_sig = da.noise_augmentation(sig, noise_dir) spectrogram_sig = sp.wave_to_sample_spectrogram(sig, fs) spectrogram_aug_sig = sp.wave_to_sample_spectrogram(aug_sig, fs) fig = plt.figure(1) cmap = plt.cm.get_cmap('jet') gs = gridspec.GridSpec(2, 1) # whole spectrogram ax1 = fig.add_subplot(gs[0, 0]) ax1.pcolormesh(spectrogram_sig, cmap=cmap) ax1.set_title("Original Signal") ax2 = fig.add_subplot(gs[1, 0]) ax2.pcolormesh(spectrogram_aug_sig, cmap=cmap) ax2.set_title("Noise Augmented signal") gs.update(wspace=0.5, hspace=0.5) basename = utils.get_basename_without_ext(sig_path) fig.savefig(basename + "_noise_augmentation.png") fig.clf() plt.close(fig)
def signal_and_noise_spectrogram_from_wave_file(filepath): (fs, wave) = utils.read_wave_file(filepath) spectrogram = sp.wave_to_sample_spectrogram(wave, fs) signal_wave, noise_wave = pp.preprocess_wave(wave, fs) spectrogram_signal = sp.wave_to_sample_spectrogram(signal_wave, fs) spectrogram_noise = sp.wave_to_sample_spectrogram(noise_wave, fs) fig = plt.figure(1) cmap = plt.cm.get_cmap('jet') gs = gridspec.GridSpec(2, 2) # whole spectrogram ax1 = fig.add_subplot(gs[0, :]) ax1.pcolormesh(spectrogram, cmap=cmap) ax1.set_title("Sound") ax2 = fig.add_subplot(gs[1, 0]) ax2.pcolormesh(spectrogram_signal, cmap=cmap) ax2.set_title("Signal") ax3 = fig.add_subplot(gs[1, 1]) ax3.pcolormesh(spectrogram_noise, cmap=cmap) ax3.set_title("Noise") gs.update(wspace=0.5, hspace=0.5) basename = utils.get_basename_without_ext(filepath) fig.savefig(basename + "_noise_signal.png") fig.clf() plt.close(fig)
def preprocess_sound_file(filename, class_dir, noise_dir, segment_size_seconds): """ Preprocess sound file. Loads sound file from filename, downsampels, extracts signal/noise parts from sound file, splits the signal/noise parts into equally length segments of size segment size seconds. # Arguments filename : the sound file to preprocess class_dir : the directory to save the extracted signal segments in noise_dir : the directory to save the extracted noise segments in segment_size_seconds : the size of each segment in seconds # Returns nothing, simply saves the preprocessed sound segments """ samplerate, wave = utils.read_wave_file(filename) signal_wave, noise_wave = preprocess_wave(wave, samplerate) basename = utils.get_basename_without_ext(filename) if signal_wave.shape[0] > 0: signal_segments = split_into_segments(signal_wave, samplerate, segment_size_seconds) save_segments_to_file(class_dir, signal_segments, basename, samplerate) if noise_wave.shape[0] > 0: noise_segments = split_into_segments(noise_wave, samplerate, segment_size_seconds) save_segments_to_file(noise_dir, noise_segments, basename, samplerate)
def same_class_augmentation(wave, class_dir): """ Perform same class augmentation of the wave by loading a random segment from the class_dir and additively combine the wave with that segment. """ sig_paths = glob.glob(os.path.join(class_dir, "*.wav")) aug_sig_path = random.choice(sig_paths) (fs, aug_sig) = utils.read_wave_file(aug_sig_path) alpha = np.random.rand() wave = (1.0 - alpha) * wave + alpha * aug_sig return wave
def compute_noise_augmented(): nb_noise_segments = 3 aug_noise_files = [] wave = x for i in range(nb_noise_segments): aug_noise_files.append(random.choice(noise_files_small)) dampening_factor = 0.4 for aug_noise_path in aug_noise_files: (fs, aug_noise) = utils.read_wave_file(aug_noise_path) wave = wave + aug_noise * dampening_factor return wave
def same_class_augmentation_from_dir(class_dir): sig_paths = glob.glob(os.path.join(class_dir, "*.wav")) sig_path = np.random.choice(sig_paths, 1, replace=False)[0] (fs, sig) = utils.read_wave_file(sig_path) aug_sig_path = np.random.choice(sig_paths, 1, replace=False)[0] (fs, aug_sig) = utils.read_wave_file(aug_sig_path) alpha = np.random.rand() combined_sig = (1.0 - alpha) * sig + alpha * aug_sig spectrogram_sig = sp.wave_to_sample_spectrogram(sig, fs) spectrogram_aug_sig = sp.wave_to_sample_spectrogram(aug_sig, fs) spectrogram_combined_sig = sp.wave_to_sample_spectrogram(combined_sig, fs) fig = plt.figure(1) cmap = plt.cm.get_cmap('jet') gs = gridspec.GridSpec(3, 1) # whole spectrogram ax1 = fig.add_subplot(gs[0, 0]) ax1.pcolormesh(spectrogram_sig, cmap=cmap) ax1.set_title("Signal 1") ax2 = fig.add_subplot(gs[1, 0]) ax2.pcolormesh(spectrogram_aug_sig, cmap=cmap) ax2.set_title("Signal 2") ax3 = fig.add_subplot(gs[2, 0]) ax3.pcolormesh(spectrogram_combined_sig, cmap=cmap) ax3.set_title("Augmented Signal (alpha=" + str(alpha) + ")") gs.update(wspace=0.5, hspace=0.5) basename = utils.get_basename_without_ext(sig_path) fig.savefig(basename + "_same_class_augmentation.png") fig.clf() plt.close(fig)
def noise_augmentation(wave, noise_files): """ Perform noise augmentation of the wave by loading three noise segments from the noise_dir and add these on top of the wave with a dampening factor of 0.4 """ nb_noise_segments = 3 aug_noise_files = [] for i in range(nb_noise_segments): aug_noise_files.append(random.choice(noise_files)) # aug_noise_files = np.random.choice(noise_files, 3, replace=False) dampening_factor = 0.4 for aug_noise_path in aug_noise_files: (fs, aug_noise) = utils.read_wave_file(aug_noise_path) wave = wave + aug_noise * dampening_factor return wave
def predict(model, segment_names, directory): class_index = loader.build_class_index(directory) batch = [] for segment_name in segment_names: # load input data fs, wave = utils.read_wave_file(segment_name) Sxx = sp.wave_to_sample_spectrogram(wave, fs) Sxx = scipy.misc.imresize(Sxx, (256, 512)) batch.append(Sxx) batch = np.array(batch) batch = batch.reshape(batch.shape[0], batch.shape[1], batch.shape[2], 1) y_probs = model.predict(batch, batch_size=16, verbose=1) y_cats = [int(np.argmax(y_prob)) for y_prob in y_probs] species = [class_index[y_cat] for y_cat in y_cats] return species
def load_wav_as_tempogram(fname, target_size=None, noise_files=None, augment_with_noise=False, class_dir=None): (fs, signal) = utils.read_wave_file(fname) if class_dir: signal = da.same_class_augmentation(signal, class_dir) if augment_with_noise: signal = da.noise_augmentation(signal, noise_files) tempogram = sp.wave_to_tempogram(signal, fs) if target_size: tempogram = scipy.misc.imresize(tempogram, target_size) tempogram = tempogram.reshape((tempogram.shape[0], tempogram.shape[1], 1)) return tempogram
def load_wav_as_mfcc(fname, target_size=None, noise_files=None, augment_with_noise=False, class_dir=None): (fs, signal) = utils.read_wave_file(fname) if class_dir: signal = da.same_class_augmentation(signal, class_dir) if augment_with_noise: signal = da.noise_augmentation(signal, noise_files) mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0]) if target_size: mfcc = scipy.misc.imresize(mfcc, target_size) mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1) return mfcc
def load_segments(segments, target_size, input_data_mode): print(segments, target_size, input_data_mode) data = [] for segment in segments: (fs, signal) = utils.read_wave_file(segment) if input_data_mode == "mfcc": sample = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0]) sample = scipy.misc.imresize(sample, target_size) sample = sample.reshape((sample.shape[0], sample.shape[1], 1)) if input_data_mode == "mfcc_delta": mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0]) mfcc_delta_3 = librosa.feature.delta(mfcc, width=3, order=1) mfcc_delta_11 = librosa.feature.delta(mfcc, width=11, order=1) mfcc_delta_19 = librosa.feature.delta(mfcc, width=19, order=1) mfcc = scipy.misc.imresize(mfcc, target_size) mfcc_delta_3 = scipy.misc.imresize(mfcc_delta_3, target_size) mfcc_delta_11 = scipy.misc.imresize(mfcc_delta_11, target_size) mfcc_delta_19 = scipy.misc.imresize(mfcc_delta_19, target_size) mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1) mfcc_delta_3 = mfcc_delta_3.reshape(mfcc_delta_3.shape[0], mfcc_delta_3.shape[1], 1) mfcc_delta_11 = mfcc_delta_11.reshape(mfcc_delta_11.shape[0], mfcc_delta_11.shape[1], 1) mfcc_delta_19 = mfcc_delta_19.reshape(mfcc_delta_19.shape[0], mfcc_delta_19.shape[1], 1) sample = np.concatenate( [mfcc, mfcc_delta_3, mfcc_delta_11, mfcc_delta_19], axis=2) if input_data_mode == "spectrogram": sample = sp.wave_to_sample_spectrogram(signal, fs) sample = scipy.misc.imresize(sample, target_size) sample = sample.reshape((sample.shape[0], sample.shape[1], 1)) data.append(sample) return np.asarray(data)
def read_random_noise_file(): f = random.choice(noise_files) (fs, x) = utils.read_wave_file(f) x = x * 2
def read_wave_file(): utils.read_wave_file(filename)
import glob import random from bird import preprocessing as pp from bird import signal_processing as sp from bird import data_augmentation as da import bird.generators.sound as gs from bird import utils filename = "/disk/martinsson-spring17/datasets/birdClef2016Subset/train/affinis/LIFECLEF2015_BIRDAMAZON_XC_WAV_RN14132_seg_0.wav" (fs, x) = utils.read_wave_file(filename) Sxx = sp.wave_to_sample_spectrogram(x, fs) n_mask = pp.compute_signal_mask(Sxx) n_mask_scaled = pp.reshape_binary_mask(n_mask, x.shape[0]) Nxx = pp.normalize(Sxx) target_size = (256, 512) noise_files = glob.glob( "/disk/martinsson-spring17/birdClef2016Whole/noise/*.wav") noise_files_small = glob.glob("/home/martinsson-spring17/data/noise/*.wav") class_dir = "/disk/martinsson-spring17/datasets/birdClef2016Subset/train/affinis" def compute_tempogram(): sp.wave_to_tempogram(x, fs) def compute_spectrogram(): sp.wave_to_sample_spectrogram(x, fs)