示例#1
0
def sprengel_binary_mask_from_wave_file(filepath):
    fs, x = utils.read_wave_file(filepath)
    Sxx = sp.wave_to_amplitude_spectrogram(x, fs)
    Sxx_log = sp.wave_to_log_amplitude_spectrogram(x, fs)

    # plot spectrogram
    fig = plt.figure(1)
    subplot_image(Sxx_log, 411, "Spectrogram")

    Sxx = pp.normalize(Sxx)
    binary_image = pp.median_clipping(Sxx, 3.0)

    subplot_image(binary_image + 0, 412, "Median Clipping")

    binary_image = morphology.binary_erosion(binary_image,
                                             selem=np.ones((4, 4)))

    subplot_image(binary_image + 0, 413, "Erosion")

    binary_image = morphology.binary_dilation(binary_image,
                                              selem=np.ones((4, 4)))

    subplot_image(binary_image + 0, 414, "Dilation")

    mask = np.array([np.max(col) for col in binary_image.T])
    mask = morphology.binary_dilation(mask, np.ones(4))
    mask = morphology.binary_dilation(mask, np.ones(4))

    # plot_vector(mask, "Mask")

    fig.set_size_inches(10, 12)
    plt.tight_layout()
    fig.savefig(utils.get_basename_without_ext(filepath) + "_binary_mask.png",
                dpi=100)
示例#2
0
def load_wav_as_mfcc_delta(fname,
                           target_size=None,
                           noise_files=None,
                           augment_with_noise=False,
                           class_dir=None):
    (fs, signal) = utils.read_wave_file(fname)

    if class_dir:
        signal = da.same_class_augmentation(signal, class_dir)

    if augment_with_noise:
        signal = da.noise_augmentation(signal, noise_files)

    mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])
    mfcc_delta_3 = librosa.feature.delta(mfcc, width=3, order=1)
    mfcc_delta_11 = librosa.feature.delta(mfcc, width=11, order=1)
    mfcc_delta_19 = librosa.feature.delta(mfcc, width=19, order=1)

    if target_size:
        mfcc = scipy.misc.imresize(mfcc, target_size)
        mfcc_delta_3 = scipy.misc.imresize(mfcc_delta_3, target_size)
        mfcc_delta_11 = scipy.misc.imresize(mfcc_delta_11, target_size)
        mfcc_delta_19 = scipy.misc.imresize(mfcc_delta_19, target_size)

    mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1)
    mfcc_delta_3 = mfcc_delta_3.reshape(mfcc_delta_3.shape[0],
                                        mfcc_delta_3.shape[1], 1)
    mfcc_delta_11 = mfcc_delta_11.reshape(mfcc_delta_11.shape[0],
                                          mfcc_delta_11.shape[1], 1)
    mfcc_delta_19 = mfcc_delta_19.reshape(mfcc_delta_19.shape[0],
                                          mfcc_delta_19.shape[1], 1)
    mfcc_delta = np.concatenate(
        [mfcc, mfcc_delta_3, mfcc_delta_11, mfcc_delta_19], axis=2)

    return mfcc_delta
示例#3
0
def noise_augmentation_from_dirs(noise_dir, class_dir):
    sig_paths = glob.glob(os.path.join(class_dir, "*.wav"))
    sig_path = np.random.choice(sig_paths, 1, replace=False)[0]
    (fs, sig) = utils.read_wave_file(sig_path)
    aug_sig = da.noise_augmentation(sig, noise_dir)

    spectrogram_sig = sp.wave_to_sample_spectrogram(sig, fs)
    spectrogram_aug_sig = sp.wave_to_sample_spectrogram(aug_sig, fs)

    fig = plt.figure(1)
    cmap = plt.cm.get_cmap('jet')
    gs = gridspec.GridSpec(2, 1)
    # whole spectrogram
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.pcolormesh(spectrogram_sig, cmap=cmap)
    ax1.set_title("Original Signal")

    ax2 = fig.add_subplot(gs[1, 0])
    ax2.pcolormesh(spectrogram_aug_sig, cmap=cmap)
    ax2.set_title("Noise Augmented signal")

    gs.update(wspace=0.5, hspace=0.5)

    basename = utils.get_basename_without_ext(sig_path)
    fig.savefig(basename + "_noise_augmentation.png")

    fig.clf()
    plt.close(fig)
示例#4
0
def signal_and_noise_spectrogram_from_wave_file(filepath):

    (fs, wave) = utils.read_wave_file(filepath)
    spectrogram = sp.wave_to_sample_spectrogram(wave, fs)
    signal_wave, noise_wave = pp.preprocess_wave(wave, fs)
    spectrogram_signal = sp.wave_to_sample_spectrogram(signal_wave, fs)
    spectrogram_noise = sp.wave_to_sample_spectrogram(noise_wave, fs)

    fig = plt.figure(1)
    cmap = plt.cm.get_cmap('jet')
    gs = gridspec.GridSpec(2, 2)
    # whole spectrogram
    ax1 = fig.add_subplot(gs[0, :])
    ax1.pcolormesh(spectrogram, cmap=cmap)
    ax1.set_title("Sound")

    ax2 = fig.add_subplot(gs[1, 0])
    ax2.pcolormesh(spectrogram_signal, cmap=cmap)
    ax2.set_title("Signal")

    ax3 = fig.add_subplot(gs[1, 1])
    ax3.pcolormesh(spectrogram_noise, cmap=cmap)
    ax3.set_title("Noise")

    gs.update(wspace=0.5, hspace=0.5)

    basename = utils.get_basename_without_ext(filepath)
    fig.savefig(basename + "_noise_signal.png")

    fig.clf()
    plt.close(fig)
def preprocess_sound_file(filename, class_dir, noise_dir,
                          segment_size_seconds):
    """ Preprocess sound file. Loads sound file from filename, downsampels,
    extracts signal/noise parts from sound file, splits the signal/noise parts
    into equally length segments of size segment size seconds.

    # Arguments
        filename : the sound file to preprocess
        class_dir : the directory to save the extracted signal segments in
        noise_dir : the directory to save the extracted noise segments in
        segment_size_seconds : the size of each segment in seconds
    # Returns
        nothing, simply saves the preprocessed sound segments
    """

    samplerate, wave = utils.read_wave_file(filename)
    signal_wave, noise_wave = preprocess_wave(wave, samplerate)
    basename = utils.get_basename_without_ext(filename)

    if signal_wave.shape[0] > 0:
        signal_segments = split_into_segments(signal_wave, samplerate,
                                              segment_size_seconds)
        save_segments_to_file(class_dir, signal_segments, basename, samplerate)
    if noise_wave.shape[0] > 0:
        noise_segments = split_into_segments(noise_wave, samplerate,
                                             segment_size_seconds)
        save_segments_to_file(noise_dir, noise_segments, basename, samplerate)
def same_class_augmentation(wave, class_dir):
    """ Perform same class augmentation of the wave by loading a random segment
    from the class_dir and additively combine the wave with that segment.
    """
    sig_paths = glob.glob(os.path.join(class_dir, "*.wav"))
    aug_sig_path = random.choice(sig_paths)
    (fs, aug_sig) = utils.read_wave_file(aug_sig_path)
    alpha = np.random.rand()
    wave = (1.0 - alpha) * wave + alpha * aug_sig
    return wave
def compute_noise_augmented():
    nb_noise_segments = 3
    aug_noise_files = []
    wave = x
    for i in range(nb_noise_segments):
        aug_noise_files.append(random.choice(noise_files_small))
    dampening_factor = 0.4
    for aug_noise_path in aug_noise_files:
        (fs, aug_noise) = utils.read_wave_file(aug_noise_path)
        wave = wave + aug_noise * dampening_factor
    return wave
示例#8
0
def same_class_augmentation_from_dir(class_dir):
    sig_paths = glob.glob(os.path.join(class_dir, "*.wav"))
    sig_path = np.random.choice(sig_paths, 1, replace=False)[0]
    (fs, sig) = utils.read_wave_file(sig_path)

    aug_sig_path = np.random.choice(sig_paths, 1, replace=False)[0]
    (fs, aug_sig) = utils.read_wave_file(aug_sig_path)
    alpha = np.random.rand()
    combined_sig = (1.0 - alpha) * sig + alpha * aug_sig

    spectrogram_sig = sp.wave_to_sample_spectrogram(sig, fs)
    spectrogram_aug_sig = sp.wave_to_sample_spectrogram(aug_sig, fs)
    spectrogram_combined_sig = sp.wave_to_sample_spectrogram(combined_sig, fs)

    fig = plt.figure(1)
    cmap = plt.cm.get_cmap('jet')
    gs = gridspec.GridSpec(3, 1)
    # whole spectrogram
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.pcolormesh(spectrogram_sig, cmap=cmap)
    ax1.set_title("Signal 1")

    ax2 = fig.add_subplot(gs[1, 0])
    ax2.pcolormesh(spectrogram_aug_sig, cmap=cmap)
    ax2.set_title("Signal 2")

    ax3 = fig.add_subplot(gs[2, 0])
    ax3.pcolormesh(spectrogram_combined_sig, cmap=cmap)
    ax3.set_title("Augmented Signal (alpha=" + str(alpha) + ")")

    gs.update(wspace=0.5, hspace=0.5)

    basename = utils.get_basename_without_ext(sig_path)
    fig.savefig(basename + "_same_class_augmentation.png")

    fig.clf()
    plt.close(fig)
def noise_augmentation(wave, noise_files):
    """ Perform noise augmentation of the wave by loading three noise segments
    from the noise_dir and add these on top of the wave with a dampening factor
    of 0.4
    """
    nb_noise_segments = 3
    aug_noise_files = []
    for i in range(nb_noise_segments):
        aug_noise_files.append(random.choice(noise_files))

    # aug_noise_files = np.random.choice(noise_files, 3, replace=False)
    dampening_factor = 0.4
    for aug_noise_path in aug_noise_files:
        (fs, aug_noise) = utils.read_wave_file(aug_noise_path)
        wave = wave + aug_noise * dampening_factor
    return wave
def predict(model, segment_names, directory):
    class_index = loader.build_class_index(directory)
    batch = []
    for segment_name in segment_names:
        # load input data
        fs, wave = utils.read_wave_file(segment_name)
        Sxx = sp.wave_to_sample_spectrogram(wave, fs)
        Sxx = scipy.misc.imresize(Sxx, (256, 512))
        batch.append(Sxx)
    batch = np.array(batch)

    batch = batch.reshape(batch.shape[0], batch.shape[1], batch.shape[2], 1)

    y_probs = model.predict(batch, batch_size=16, verbose=1)
    y_cats = [int(np.argmax(y_prob)) for y_prob in y_probs]
    species = [class_index[y_cat] for y_cat in y_cats]

    return species
示例#11
0
def load_wav_as_tempogram(fname,
                          target_size=None,
                          noise_files=None,
                          augment_with_noise=False,
                          class_dir=None):
    (fs, signal) = utils.read_wave_file(fname)

    if class_dir:
        signal = da.same_class_augmentation(signal, class_dir)

    if augment_with_noise:
        signal = da.noise_augmentation(signal, noise_files)

    tempogram = sp.wave_to_tempogram(signal, fs)

    if target_size:
        tempogram = scipy.misc.imresize(tempogram, target_size)

    tempogram = tempogram.reshape((tempogram.shape[0], tempogram.shape[1], 1))
    return tempogram
示例#12
0
def load_wav_as_mfcc(fname,
                     target_size=None,
                     noise_files=None,
                     augment_with_noise=False,
                     class_dir=None):
    (fs, signal) = utils.read_wave_file(fname)

    if class_dir:
        signal = da.same_class_augmentation(signal, class_dir)

    if augment_with_noise:
        signal = da.noise_augmentation(signal, noise_files)

    mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])

    if target_size:
        mfcc = scipy.misc.imresize(mfcc, target_size)

    mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1)

    return mfcc
示例#13
0
def load_segments(segments, target_size, input_data_mode):
    print(segments, target_size, input_data_mode)
    data = []
    for segment in segments:
        (fs, signal) = utils.read_wave_file(segment)
        if input_data_mode == "mfcc":
            sample = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])
            sample = scipy.misc.imresize(sample, target_size)
            sample = sample.reshape((sample.shape[0], sample.shape[1], 1))
        if input_data_mode == "mfcc_delta":
            mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])
            mfcc_delta_3 = librosa.feature.delta(mfcc, width=3, order=1)
            mfcc_delta_11 = librosa.feature.delta(mfcc, width=11, order=1)
            mfcc_delta_19 = librosa.feature.delta(mfcc, width=19, order=1)

            mfcc = scipy.misc.imresize(mfcc, target_size)
            mfcc_delta_3 = scipy.misc.imresize(mfcc_delta_3, target_size)
            mfcc_delta_11 = scipy.misc.imresize(mfcc_delta_11, target_size)
            mfcc_delta_19 = scipy.misc.imresize(mfcc_delta_19, target_size)

            mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1)
            mfcc_delta_3 = mfcc_delta_3.reshape(mfcc_delta_3.shape[0],
                                                mfcc_delta_3.shape[1], 1)
            mfcc_delta_11 = mfcc_delta_11.reshape(mfcc_delta_11.shape[0],
                                                  mfcc_delta_11.shape[1], 1)
            mfcc_delta_19 = mfcc_delta_19.reshape(mfcc_delta_19.shape[0],
                                                  mfcc_delta_19.shape[1], 1)
            sample = np.concatenate(
                [mfcc, mfcc_delta_3, mfcc_delta_11, mfcc_delta_19], axis=2)

        if input_data_mode == "spectrogram":
            sample = sp.wave_to_sample_spectrogram(signal, fs)
            sample = scipy.misc.imresize(sample, target_size)
            sample = sample.reshape((sample.shape[0], sample.shape[1], 1))
        data.append(sample)

    return np.asarray(data)
def read_random_noise_file():
    f = random.choice(noise_files)
    (fs, x) = utils.read_wave_file(f)
    x = x * 2
def read_wave_file():
    utils.read_wave_file(filename)
import glob
import random

from bird import preprocessing as pp
from bird import signal_processing as sp
from bird import data_augmentation as da
import bird.generators.sound as gs
from bird import utils

filename = "/disk/martinsson-spring17/datasets/birdClef2016Subset/train/affinis/LIFECLEF2015_BIRDAMAZON_XC_WAV_RN14132_seg_0.wav"
(fs, x) = utils.read_wave_file(filename)
Sxx = sp.wave_to_sample_spectrogram(x, fs)
n_mask = pp.compute_signal_mask(Sxx)
n_mask_scaled = pp.reshape_binary_mask(n_mask, x.shape[0])
Nxx = pp.normalize(Sxx)

target_size = (256, 512)
noise_files = glob.glob(
    "/disk/martinsson-spring17/birdClef2016Whole/noise/*.wav")
noise_files_small = glob.glob("/home/martinsson-spring17/data/noise/*.wav")
class_dir = "/disk/martinsson-spring17/datasets/birdClef2016Subset/train/affinis"


def compute_tempogram():
    sp.wave_to_tempogram(x, fs)


def compute_spectrogram():
    sp.wave_to_sample_spectrogram(x, fs)