Пример #1
0
from maad import sound, rois, features
from maad.util import power2dB, plot2D, format_features, read_audacity_annot

#%%
# First, load and audio file and compute the power spectrogram.
s, fs = sound.load('../data/cold_forest_daylight.wav')

t0 = 0
t1 = 20
f0 = 100
f1 = 10000
dB_max = 96

Sxx_power, tn, fn, ext = sound.spectrogram(s,
                                           fs,
                                           nperseg=1024,
                                           noverlap=1024 // 2,
                                           fcrop=(f0, f1),
                                           tcrop=(t0, t1))

# Convert the power spectrogram into dB, add dB_max which is the maximum decibel
# range when quantification bit is 16bits and display the result
Sxx_db = power2dB(Sxx_power) + dB_max
plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext})

#%%
# Then, relevant acoustic events are extracted directly from the power
# spectrogram based on a double thresholding technique. The result is binary
# image called a mask. Double thresholding technique is more sophisticated than
# basic thresholding based on a single value. First, a threshold selects pixels
# with high value (i.e. high acoustic energy). They should belong to an acoustic
# event. They are called seeds. From these seeds, we aggregate pixels connected
In an audio signal, regions of interest are usually regions with high density of energy. The function find_rois_cwt allows finding regions of interest in the signal giving very simple and intuitive parameters: temporal length and frequency limits. This segmentation can be seen as a coarse detection process, the starting point of more advanced classification methods.

The following sound example as two main different soundtypes in the foreground:

- An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds
- A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds
"""

#%% Load an audio file and compute the spectrogram for visualization.

from maad import sound
from maad.rois import find_rois_cwt
from maad.util import power2dB, plot2D

s, fs = sound.load('../../data/spinetail.wav')
Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=100) + 100
plot2D(Sxx_db, **{'extent': ext})

#%%
# Detect the accelerating trill
# -----------------------------
# The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2.

_ = find_rois_cwt(s,
                  fs,
                  flims=(4500, 8000),
                  tlen=2,
                  th=0,
                  display=True,
                  figsize=(13, 6))
"""
from maad.sound import load, spectrogram
from maad.features import shape_features, plot_shape, centroid_features, overlay_centroid
from maad.util import read_audacity_annot, linear_scale, format_features, get_unimode, running_mean
from maad.rois import overlay_rois, create_mask, select_rois, find_rois_cwt, remove_background, median_equalizer
from skimage import morphology
import numpy as np
import pandas as pd

###=============== load audio =================
s, fs = load('./data/spinetail.wav')
rois = read_audacity_annot(
    './data/spinetail.txt')  ## annotations using Audacity

###=============== compute spectrogram =================
Sxx, tn, fn, ext = spectrogram(s, fs)
Sxx = 10 * np.log10(Sxx)

rois = format_features(rois, tn, fn)

###=============== from Audacity =================

### with all labels
ax, fig = overlay_rois(Sxx, ext, rois, vmin=-120, vmax=20)

# Compute an visualize features
shape, params = shape_features(Sxx, resolution='low', rois=rois)
plot_shape(shape.mean(), params)

# Compute and visualize centroids
centroid = centroid_features(Sxx, rois)
Пример #4
0
        print('Processing date: ', date)
        # concat audio into array
        s_sum = list()
        for index, row in flist_day.iterrows():
            s = s_dict[row.date]['s']
            s_sum.append(s)

        # crossfade and high pass filtering
        s_sum = crossfade_list(s_sum, fs)
        s_sum = butter_filter(s_sum, cutoff=200, fs=fs, order=2, ftype='high')

        # compute spectrogram
        im, dt, df, ext = sound.spectrogram(s_sum,
                                            fs,
                                            nperseg=opt_spec['wl'],
                                            cmap='viridis',
                                            overlap=opt_spec['ovlp'],
                                            fcrop=opt_spec['fcrop'],
                                            rescale=True,
                                            db_range=opt_spec['db_range'])
        # Apply gaussian smoothing
        im = gaussian(im, sigma=0.5, mode='reflect')

        # Normalize spectrogram according to sensor model
        vmin, vmax = 0.4, 0.8  # Audiomoth
        im[im < vmin] = vmin
        im[im > vmax] = vmax
        im = (im - im.min()) / (im.max() - im.min())
        # save to file
        im = np.flip(im, axis=0)
        key = fname_open[0:-4] + '_' + date
        io.imsave(path_save + key + fmt, im)
Пример #5
0
                                                    fs,
                                                    gain=G,
                                                    sensibility=S,
                                                    dB_threshold=3,
                                                    rejectDuration=0.01,
                                                    verbose=False,
                                                    display=False)
    """ =======================================================================
                     Computation in the frequency domain 
    ========================================================================"""

    # Compute the Power Spectrogram Density (PSD) : Sxx_power
    Sxx_power, tn, fn, ext = sound.spectrogram(wave,
                                               fs,
                                               window='hanning',
                                               nperseg=1024,
                                               noverlap=1024 // 2,
                                               verbose=False,
                                               display=False,
                                               savefig=None)

    # compute all the spectral indices and store them into a DataFrame
    # flim_low, flim_mid, flim_hi corresponds to the frequency limits in Hz
    # that are required to compute somes indices (i.e. NDSI)
    # if R_compatible is set to 'soundecology', then the output are similar to
    # soundecology R package.
    # mask_param1 and mask_param2 are two parameters to find the regions of
    # interest (ROIs). These parameters need to be adapted to the dataset in
    # order to select ROIs
    df_spec_ind, df_spec_ind_per_bin = features.all_spectral_alpha_indices(
        Sxx_power,
        tn,
# Wildlife. The sensitivity of the internal microphone is -35dBV and the
# maximal voltage converted by the analog to digital convertor (ADC) is 2Vpp
# (peak to peak). The gain used for the recording is a combination of
# the internal pre-amplifier of the SM4, which is 26dB and the adjustable gain
# which was 16dB. So the total gain applied to the signal is : 42dB

# We load the sound
w, fs = sound.load('../../data/spinetail.wav')
# We convert the sound into sound pressure level (Pa)
p0 = spl.wav2pressure(wave=w, gain=42, Vadc=2, sensitivity=-35)
# We select part of the sound with the spinetail signal
p0_sig = p0[int(5.68 * fs):int(7.48 * fs)]
# We select part of the sound with background
p0_noise = p0[int(8.32 * fs):int(10.12 * fs)]
# We convert both signals into spectrograms
Sxx_power, tn, fn, ext = sound.spectrogram(p0_sig, fs)
Sxx_power_noise, tn, fn, ext = sound.spectrogram(p0_noise, fs)
# We convert both spectrograms into dB. We choose a dB range of 96dB which
# is the maximal range for a 16 bits signal.
Sxx_dB = util.power2dB(Sxx_power, db_range=96) + 96
Sxx_dB_noise = util.power2dB(Sxx_power_noise, db_range=96) + 96

#%%
# Before simulating the attenuation of the acoustic signature depending on
# the distance, we need to evaluate the distance at which the signal of the
# spinetail was recordered.
# First, we estimate the sound level L of the spinetail song in the recording
# by selected the sound between 4900-7500 Hz.
p0_sig_4900_7500 = sound.select_bandwidth(p0_sig,
                                          fs,
                                          fcut=[4900, 7300],
Пример #7
0
                       remove_background_along_axis, sharpness)
import numpy as np

from timeit import default_timer as timer

import matplotlib.pyplot as plt

#%%
# Load and plot the spectrogram of the original audio file
# --------------------------------------------------------
# First, we load the audio file and take its spectrogram.
# The linear spectrogram is then transformed into dB. The dB range is  96dB 
# which is the maximum dB range value for a 16bits audio recording. We add
# 96dB in order to get have only positive values in the spectrogram.
s, fs = load('../../data/tropical_forest_morning.wav')
Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60])
Sxx_dB = power2dB(Sxx, db_range=96) + 96
plot2d(Sxx_dB, extent=ext, title='original',
       vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40)

print ("Original sharpness : %2.3f" % sharpness(Sxx_dB))

#%%
# Test different methods to remove stationary background noise
# ------------------------------------------------------------
# Test the function "remove_background"
start = timer()
X1, noise_profile1, _ = remove_background(Sxx_dB)
elapsed_time = timer() - start
print("---- test remove_background -----")
print("duration %2.3f s" % elapsed_time)
Пример #8
0
s, fs = sound.load('../../data/spinetail.wav')
util.plot_wave(s, fs)

#%%
# It can be noticed that in this audio there are four consecutive songs of the spinetail
# *Cranioleuca erythorps*, every song lasting of approximatelly two seconds.
# Let's trim the signal to zoom in on the details of the song.

s_trim = sound.trim(s, fs, 5, 8)

#%%
# Onced trimmed, lets compute the envelope of the signal, the Fourier and short-time Fourier transforms.
env = sound.envelope(s_trim, mode='fast', Nt=128)
pxx, fidx = sound.spectrum(s, fs, nperseg=1024, method='welch')
Sxx, tn, fn, ext = sound.spectrogram(s_trim,
                                     fs,
                                     window='hann',
                                     nperseg=1024,
                                     noverlap=512)

#%%
# Finally, we can visualize the signal characteristics in the temporal and
# spectral domains.

fig, ax = plt.subplots(4, 1, figsize=(8, 10))
util.plot_wave(s_trim, fs, ax=ax[0])
util.plot_wave(env, fs, ax=ax[1])
util.plot_spectrum(pxx, fidx, ax=ax[2])
util.plot_spectrogram(Sxx, extent=ext, ax=ax[3], colorbar=False)
Пример #9
0
def batch_feature_rois_no_verb(rois_list, params_features, path_audio):
    """
    Computes features for a list of files
    
    Parameters:
    ----------
        params_features: dict
            Dictionary with the basic parameters to feed find_rois: 
            'flims', 'tlen', and 'th'.
        path_flist : str
            Path to a *.txt file with the list of audio filenames to process
        path_audio : str
            Path to the place were the dataset of audio files are stored
        path_save : str
            Path with the file name to save the csv

    Returns:
    -------
        info_features: dic
            Dictionary with features and all the parameters used to compute the features.
            Included keys: features, parameters_df, opt_shape, opt_spectro
            
    """    
    ## TODO: when the time limits are too short, the function has problems
    # load parameters
    flims = params_features['flims']
    opt_spec = params_features['opt_spec']
    opt_shape = opt_shape_presets(params_features['opt_shape_str'])

    # load detection data
    
    features = []
    for idx, file in enumerate(rois_list):   
        # unpack file values
        fname = file['fname']
        rois_tf = file['rois']
        #print(idx+1, '/', len(rois_list), fname)    
        
        if rois_tf.empty:
            #print('< No detection on file >')
            features.append({'fname':fname, 'features': pd.DataFrame()})
        else:
            # load materials: sound, spectrogram
            s, fs = sound.load(path_audio+fname)
            im, dt, df, ext = sound.spectrogram(s, fs, nperseg=opt_spec['nperseg'], 
                                                overlap=opt_spec['overlap'], fcrop=flims, 
                                                rescale=False, db_range=opt_spec['db_range'])
            
            # format rois to bbox
            ts = np.arange(ext[0], ext[1], dt)
            f = np.arange(ext[2],ext[3]+df,df)
            rois_bbox = format_rois(rois_tf, ts, f, fmt='bbox')
                
            # roi to image blob
            im_blobs = rois_to_imblobs(np.zeros(im.shape), rois_bbox)
            
            # get features: shape, center frequency
            im = normalize_2d(im, 0, 1)
            bbox, params, shape = shape_features(im, im_blobs, resolution='custom', 
                                                 opt_shape=opt_shape)
            _, cent = centroid(im, im_blobs)
            cent['frequency']= f[round(cent.y).astype(int)]  # y values to frequency
            
            # format rois to time-frequency
            rois_out = format_rois(bbox, ts, f, fmt='tf')
            
            # combine into a single df
            aux_df = pd.concat([rois_out, shape, cent.frequency], axis=1)
            #        aux_df['fname'] = fname
            features.append({'fname':fname, 'features': aux_df})
    
    
    # Arranges the data into a dictionary
    info_features = {'features': features,
                     'parameters_df': params,
                     'opt_shape': opt_shape,
                     'opt_spectro': opt_spec}
    return info_features
Пример #10
0
from maad.features import shape_features, plot_shape
from maad.util import format_features, read_audacity_annot, power2dB
from maad.rois import overlay_rois

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn import preprocessing

s, fs = load('../data/spinetail.wav')
rois_tf = read_audacity_annot('../data/spinetail.txt')  ## annotations using Audacity
rois_cr = rois_tf.loc[rois_tf.label=='CRER',]  
rois_sp = rois_tf.loc[rois_tf.label=='SP',]

Sxx_power, ts, f, ext = spectrogram(s, fs)
Sxx_dB = power2dB(Sxx_power, db_range=90) + 96

# Visualize large vocalizations
rois_cr = format_features(rois_cr, ts, f)
overlay_rois(Sxx_dB, rois_cr, **{'extent':ext, 'vmin':0, 'vmax':80})

# Visualize short vocalizations
rois_sp = format_features(rois_sp, ts, f)
overlay_rois(Sxx_dB, rois_sp, **{'extent':ext, 'vmin':0, 'vmax':80})

# Compute an visualize features
shape_cr, params = shape_features(Sxx_dB, resolution='med', rois=rois_cr)
ax = plot_shape(shape_cr.mean(), params)

shape_sp, params = shape_features(Sxx_dB, resolution='med', rois=rois_sp)
Пример #11
0
def compute_rois_features(s, fs, rois_tf, opt_spec, opt_shape, flims):
    """
    Computes shape and central frequency features from signal at specified
    time-frequency limits defined by regions of interest (ROIs)
    
    Parameters
    ----------
        s: ndarray
            Singal to be analysed
        fs: int
            Sampling frequency of the signal
        rois_tf: pandas DataFrame
            Time frequency limits for the analysis. Columns should have at
            least min_t, max_t, min_f, max_f. Can be computed with multiple
            detection methods, such as find_rois_cwt
        opt_spec: dictionnary
            Options for the spectrogram with keys, window lenght 'nperseg' and,
            window overlap in percentage 'overlap'
        opt_shape: dictionary
            Options for the filter bank (kbank_opt) and the number of scales (npyr)
        flims: list of 2 scalars
            Minimum and maximum boundary frequency values in Hertz
    
    Returns
    -------
        feature_rois: pandas Dataframe
            A dataframe with each column corresponding to a feature
    
    Example
    -------
        s, fs = sound.load('spinetail.wav')        
        rois_tf = find_rois_cwt(s, fs, flims=(3000, 8000), tlen=2, th=0.003)
        opt_spec = {'nperseg': 512, 'overlap': 0.5}
        opt_shape = opt_shape_presets('med')
        features_rois = compute_rois_features(s, fs, rois_tf, opt_spec, 
                                              opt_shape, flims)
        
    """
    im, dt, df, ext = sound.spectrogram(s,
                                        fs,
                                        nperseg=opt_spec['nperseg'],
                                        overlap=opt_spec['overlap'],
                                        fcrop=flims,
                                        rescale=False,
                                        db_range=100)

    # format rois to bbox
    ts = np.arange(ext[0], ext[1], dt)
    f = np.arange(ext[2], ext[3] + df, df)
    rois_bbox = format_rois(rois_tf, ts, f, fmt='bbox')

    # roi to image blob
    im_blobs = rois_to_imblobs(np.zeros(im.shape), rois_bbox)

    # get features: shape, center frequency
    im = normalize_2d(im, 0, 1)
    bbox, params, shape = shape_features(im,
                                         im_blobs,
                                         resolution='custom',
                                         opt_shape=opt_shape)
    _, cent = centroid(im, im_blobs)
    cent['frequency'] = f[round(cent.y).astype(int)]  # y values to frequency

    # format rois to time-frequency
    rois_out = format_rois(bbox, ts, f, fmt='tf')

    # combine into a single df
    rois_features = pd.concat([rois_out, shape, cent.frequency], axis=1)
    return rois_features
# sphinx_gallery_thumbnail_path = './_images/sphx_glr_compare_auto_and_manual_rois_selection.png'

import numpy as np
import pandas as pd
from maad import sound, rois, features
from maad.util import (power2dB, plot2d, format_features, read_audacity_annot,
                       overlay_rois, overlay_centroid)

#%%
# First, load and audio file and compute the power spectrogram.
s, fs = sound.load('../../data/cold_forest_daylight.wav')

dB_max = 96

Sxx_power, tn, fn, ext = sound.spectrogram(s,
                                           fs,
                                           nperseg=1024,
                                           noverlap=1024 // 2)

# Convert the power spectrogram into dB, add dB_max which is the maximum decibel
# range when quantification bit is 16bits and display the result
Sxx_db = power2dB(Sxx_power) + dB_max
plot2d(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext})

#%%
# Then, relevant acoustic events are extracted directly from the power
# spectrogram based on a double thresholding technique. The result is binary
# image called a mask. Double thresholding technique is more sophisticated than
# basic thresholding based on a single value. First, a threshold selects pixels
# with high value (i.e. high acoustic energy). They should belong to an acoustic
# event. They are called seeds. From these seeds, we aggregate pixels connected
# to the seed with value higher than the second threslhold. These new pixels