def batch_find_rois(flist, params_detections, path_audio):
    Exports features saved as joblib into a csv file readable by R and other 
    programs. The joblib file should be computed using the 
        params_detection: dict
            Dictionary with the basic parameters to feed find_rois: 
            'flims', 'tlen', and 'th'.
        path_flist : str
            Path to a *.txt file with the list of audio filenames to process
        path_audio : str
            Path to the place were the dataset of audio files are stored

        Saves a joblib file to disk. Does not return any variable
    # load parameters
    flims = params_detections['flims']
    tlen = params_detections['tlen']
    th = params_detections['th']

    detections = list()
    for idx, fname in enumerate(flist['fname']):
        print(idx + 1, '/', len(flist), fname)
        s, fs = sound.load(path_audio + fname)
        rois = find_rois_cwt(s, fs, flims, tlen, th)
        if not rois.empty:
            # filter rois shorter than 25% of tlen
            idx_rm = (rois.max_t - rois.min_t) < tlen * 0.25
            rois.drop(index=np.where(idx_rm)[0], inplace=True)
            rois.reset_index(inplace=True, drop=True)
        # save to list
        detections.append({'fname': fname, 'rois': rois})

    info_detections = {
        'detections': detections,
        'parameters': params_detections
    return info_detections
mathematical morphology tools...

Dependencies: To execute this example you will need to have installed the 
scikit-image, scikit-learn and pandas Python packages.

import numpy as np
import pandas as pd
from maad import sound, rois, features
from maad.util import power2dB, plot2D, format_features, read_audacity_annot

# First, load and audio file and compute the power spectrogram.
s, fs = sound.load('../data/cold_forest_daylight.wav')

t0 = 0
t1 = 20
f0 = 100
f1 = 10000
dB_max = 96

Sxx_power, tn, fn, ext = sound.spectrogram(s,
                                           noverlap=1024 // 2,
                                           fcrop=(f0, f1),
                                           tcrop=(t0, t1))

# Convert the power spectrogram into dB, add dB_max which is the maximum decibel
from maad.sound import load, spectrogram
from maad.features import shape_features, plot_shape, centroid_features, overlay_centroid
from maad.util import read_audacity_annot, linear_scale, format_features, get_unimode, running_mean
from maad.rois import overlay_rois, create_mask, select_rois, find_rois_cwt, remove_background, median_equalizer
from skimage import morphology
import numpy as np
import pandas as pd

###=============== load audio =================
s, fs = load('./data/spinetail.wav')
rois = read_audacity_annot(
    './data/spinetail.txt')  ## annotations using Audacity

###=============== compute spectrogram =================
Sxx, tn, fn, ext = spectrogram(s, fs)
Sxx = 10 * np.log10(Sxx)

rois = format_features(rois, tn, fn)

###=============== from Audacity =================

### with all labels
ax, fig = overlay_rois(Sxx, ext, rois, vmin=-120, vmax=20)

# Compute an visualize features
In an audio signal, regions of interest are usually regions with high density of energy. The function find_rois_cwt allows finding regions of interest in the signal giving very simple and intuitive parameters: temporal length and frequency limits. This segmentation can be seen as a coarse detection process, the starting point of more advanced classification methods.

The following sound example as two main different soundtypes in the foreground:

- An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds
- A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds

#%% Load an audio file and compute the spectrogram for visualization.

from maad import sound
from maad.rois import find_rois_cwt
from maad.util import power2dB, plot2D

s, fs = sound.load('../../data/spinetail.wav')
Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=100) + 100
plot2D(Sxx_db, **{'extent': ext})

# Detect the accelerating trill
# -----------------------------
# The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2.

_ = find_rois_cwt(s,
                  flims=(4500, 8000),
df_indices = pd.DataFrame()
df_indices_per_bin = pd.DataFrame()

for index, row in df.iterrows():

    # get the full filename of the corresponding row
    fullfilename = row['file']
    # Save file basename
    path, filename = os.path.split(fullfilename)

    #### Load the original sound (16bits) and get the sampling frequency fs
        wave, fs = sound.load(filename=fullfilename,

        # Delete the row if the file does not exist or raise a value error (i.e. no EOF)
        df.drop(index, inplace=True)
    """ =======================================================================
                     Computation in the time domain 

    # Parameters of the audio recorder. This is not a mandatory but it allows
    # to compute the sound pressure level of the audio file (dB SPL) as a
    # sonometer would do.
    S = -35  # Sensbility microphone-35dBV (SM4) / -18dBV (Audiomoth)
    G = 26 + 16  # Amplification gain (26dB (SM4 preamplifier))
                       remove_background_along_axis, sharpness)
import numpy as np

from timeit import default_timer as timer

import matplotlib.pyplot as plt

# Load and plot the spectrogram of the original audio file
# --------------------------------------------------------
# First, we load the audio file and take its spectrogram.
# The linear spectrogram is then transformed into dB. The dB range is  96dB 
# which is the maximum dB range value for a 16bits audio recording. We add
# 96dB in order to get have only positive values in the spectrogram.
s, fs = load('../../data/tropical_forest_morning.wav')
Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60])
Sxx_dB = power2dB(Sxx, db_range=96) + 96
plot2d(Sxx_dB, extent=ext, title='original',
       vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40)

print ("Original sharpness : %2.3f" % sharpness(Sxx_dB))

# Test different methods to remove stationary background noise
# ------------------------------------------------------------
# Test the function "remove_background"
start = timer()
X1, noise_profile1, _ = remove_background(Sxx_dB)
elapsed_time = timer() - start
print("---- test remove_background -----")
def format_trainds(df, flims, wl, path_audio):
    Arranges all the training data into a dictionary for easy and compact access.
    df : pandas DataFrame
        DataFrame with information on the regions of interest to be arranged.
        The DataFrame must have the columns: fname, min_t, max_t.
    flims : tuple or list
        Minimum and maximum frequency limits of the band pass filter. This
        is used to filter unwanted sounds and improve the manual analysis.
    wl : int or float
        Window length (in seconds) of each region of intrest. While the regions 
        have a specified duration, with this argument it is possible to increase
        the window of observation, allowing to have a wider context to analyse 
        the audio. Recomended minimum 2 seconds.
    path_audio : str
        Path to the directory where all the raw audio files are stored

    train_data : dict
        A dictionary with the keys: roi_info, shape_features, label, audio, segments and maad_label

    print('Aligning ROIs, number of observations:', len(df))
    df['tlen'] = df.max_t - df.min_t
    audiolist = list()
    for idx, roi in df.iterrows():
        fname_wav = path_audio + roi.fname
        # define tlimits with window length
        length = roi.max_t - roi.min_t
        tlims = ((roi.min_t + length/2) - wl/2, (roi.min_t + length/2) + wl/2)
        s, fs = sound.load(fname_wav)
        s = sound.select_bandwidth(s, fs, lfc=flims[0], hfc=flims[1])
        # #normalize?
        rec_length = len(s)/fs
        # if time limits are outside the recording, add silence
        if tlims[1] > rec_length:
            # add silence at end
            sil_len = tlims[1] - rec_length
            silence = np.zeros(int(sil_len*fs))
            s_roi = np.concatenate([s[int(tlims[0]*fs):], silence])
        elif tlims[0] < 0:
            # add silence at begin
            sil_len = abs(tlims[0])
            silence = np.zeros(int(sil_len*fs))
            s_roi = np.concatenate([silence, s[0:int(tlims[1]*fs)]])
            s_roi = s[int(tlims[0]*fs):int(tlims[1]*fs)]

    ## write segments for manual annotations
    onset = (wl/2) - (df.tlen/2)
    offset = (wl/2) + (df.tlen/2)
    seg = pd.DataFrame({'onset': onset, 'offset': offset})
    seg['label'] = 'NA'
    ## assign to object and save
    train_data = dict()
    idx_features = df.columns.str.startswith('shp') | (df.columns=='frequency')
    train_data['roi_info'] = df[['fname','min_t','max_t','min_f','max_f']]
    train_data['shape_features'] = df.loc[:,idx_features] 
    train_data['label'] = seg.label
    train_data['audio'] = audiolist
    train_data['segments'] = seg[['onset','offset']]
    train_data['maad_label'] = df.cluster
    return train_data
def batch_predict_rois(flist, tuned_clfs, params, path_audio_db='./'):
    Predict the labels of rois in a list of audio files. 
    flist: pandas DataFrame
        list of audio filenames to be analysed. Column name must be 'fname'
    tuned_clfs: dict
        data structure with tuned classifiers by grid search or random search
    params: dict
        data structure with the same parameters used to train the classifiers.
        Keys to be included: 'sample_rate_wav', 'flims', 'tlen', 'th', 
        'opt_spec', 'opt_shape_str'
    path_audio_db: str, default current directory
        path pointing to the directory where the audio files are located. 
        Note that all files in flist must be in the same directory
    predictions: dict
        data structure with name of audio files as keys. Each element in the
        dictionary has a DataFrame with predictions for every region interest
        found. Predictions are given as probabilities for three different 
        classifiers, namely Random Forest ('rf'), Adaboost ('adb') and Support
        Vector Machines ('svm').
    t_start = time.time() # compute processing time
    # Load params and variables
    clf_svm = tuned_clfs['svm'].best_estimator_
    clf_rf = tuned_clfs['rf'].best_estimator_
    clf_adb = tuned_clfs['adb'].best_estimator_
    flims = params['flims']
    tlen = params['tlen']
    th = params['th']
    opt_spec = params['opt_spec']
    opt_shape = opt_shape_presets(params['opt_shape_str'])
    sample_rate_std = params['sample_rate_wav']
    # Batch: compute rois, features and predict through files
    predictions = dict()
    for idx, fname in enumerate(flist['fname']):
        print(idx+1, '/', len(flist), fname)
        # fname = flist['fname'][0]
        s, fs = sound.load(path_audio_db+fname)
        # Check sampling frequency on file
        if fs==sample_rate_std:
            print('Warning: sample rate mismatch, resampling audio file to standard', 
                  sample_rate_std, 'Hz')
            s = resample(s, fs, sample_rate_std, res_type='kaiser_fast')
            fs = sample_rate_std
        rois = find_rois_cwt(s, fs, flims, tlen, th)    
        if rois.empty:
            #print('< No detection on file >')
            predictions[fname] = -1
            # filter rois shorter than 25% of tlen
            idx_rm = (rois.max_t - rois.min_t) < tlen*0.25
            rois.drop(index=np.where(idx_rm)[0], inplace=True)
            rois.reset_index(inplace=True, drop=True)
            if rois.empty:
                print('< No detection on file >')
                predictions[fname] = -1
                # compute features
                rois_features = compute_rois_features(s, fs, rois, opt_spec, opt_shape, flims)
                # predict
                X = rois_features.loc[:,rois_features.columns.str.startswith('shp')]
                #X['frequency'] = preprocessing.scale(X['frequency'])  # new! scale frequency
                pred_rf = pd.DataFrame(data=clf_rf.predict_proba(X), 
                                       columns=[s + '_rf' for s in clf_rf.classes_.astype('str')])
                pred_adb = pd.DataFrame(data=clf_adb.predict_proba(X), 
                                        columns=[s + '_adb' for s in clf_adb.classes_.astype('str')])
                pred_svm = pd.DataFrame(data=clf_svm.predict_proba(X), 
                                        columns=[s + '_svm' for s in clf_svm.classes_.astype('str')])
                # save to variable
                pred_proba_file = pd.concat([rois, pred_rf, pred_adb, pred_svm], axis=1)
                predictions[fname] = pred_proba_file
    t_stop = time.time() # compute processing time
    print('Batch process completed. Processing time: ', np.round(t_stop - t_start,2),'s')
    return predictions
def batch_feature_rois_no_verb(rois_list, params_features, path_audio):
    Computes features for a list of files
        params_features: dict
            Dictionary with the basic parameters to feed find_rois: 
            'flims', 'tlen', and 'th'.
        path_flist : str
            Path to a *.txt file with the list of audio filenames to process
        path_audio : str
            Path to the place were the dataset of audio files are stored
        path_save : str
            Path with the file name to save the csv

        info_features: dic
            Dictionary with features and all the parameters used to compute the features.
            Included keys: features, parameters_df, opt_shape, opt_spectro
    ## TODO: when the time limits are too short, the function has problems
    # load parameters
    flims = params_features['flims']
    opt_spec = params_features['opt_spec']
    opt_shape = opt_shape_presets(params_features['opt_shape_str'])

    # load detection data
    features = []
    for idx, file in enumerate(rois_list):   
        # unpack file values
        fname = file['fname']
        rois_tf = file['rois']
        #print(idx+1, '/', len(rois_list), fname)    
        if rois_tf.empty:
            #print('< No detection on file >')
            features.append({'fname':fname, 'features': pd.DataFrame()})
            # load materials: sound, spectrogram
            s, fs = sound.load(path_audio+fname)
            im, dt, df, ext = sound.spectrogram(s, fs, nperseg=opt_spec['nperseg'], 
                                                overlap=opt_spec['overlap'], fcrop=flims, 
                                                rescale=False, db_range=opt_spec['db_range'])
            # format rois to bbox
            ts = np.arange(ext[0], ext[1], dt)
            f = np.arange(ext[2],ext[3]+df,df)
            rois_bbox = format_rois(rois_tf, ts, f, fmt='bbox')
            # roi to image blob
            im_blobs = rois_to_imblobs(np.zeros(im.shape), rois_bbox)
            # get features: shape, center frequency
            im = normalize_2d(im, 0, 1)
            bbox, params, shape = shape_features(im, im_blobs, resolution='custom', 
            _, cent = centroid(im, im_blobs)
            cent['frequency']= f[round(cent.y).astype(int)]  # y values to frequency
            # format rois to time-frequency
            rois_out = format_rois(bbox, ts, f, fmt='tf')
            # combine into a single df
            aux_df = pd.concat([rois_out, shape, cent.frequency], axis=1)
            #        aux_df['fname'] = fname
            features.append({'fname':fname, 'features': aux_df})
    # Arranges the data into a dictionary
    info_features = {'features': features,
                     'parameters_df': params,
                     'opt_shape': opt_shape,
                     'opt_spectro': opt_spec}
    return info_features
Unsupervised learning algorithms search for structures or patterns in a dataset without requiring labels. In the context of ecoacoustics, this approach can be usefull to draw inferences when manual labelling is inaccesible or too expensive. For example, unsupervised learning can be used to estimate the animal acoustic diversity [1], combine human-reasoning and automated procedures to build reference libraries, and find hidden structures in the soundscapes. 

In this example, we will use unsupervised learning to automatically annotate multiple sounds in an audio recording.  The process follows four main steps. We will (i) find sounds that can be delimited in time and frequency, here defined as regions of interest (ROIs), (ii) characterize ROIs by features in the time-frequency domain using 2D wavelets [2], (iii) use t-SNE, a dimensionality reduction algorithm, to reduce the dimensionality of the data [3], and (iv) a automatically form homogenous groups using DBSCAN [4]. We will use a real audio file recorded with an omnidirectional microphone. This audio has a poor signal-to-noise ratio, which is typical of automated audio recordings.

**Dependencies**: This example requires the Python package scikit-learn v0.24 or greater.
# sphinx_gallery_thumbnail_path = './_images/sphx_glr_plot_unsupervised_sound_classification_004.png'
import numpy as np
import matplotlib.pyplot as plt
from maad import sound, features, rois
from maad.util import power2dB, plot2d, format_features, overlay_rois

# Start by loading an example audio file. We will remove low frequency ambient noise with a lowpass filter and then compute the spectrogram.

s, fs = sound.load('../../data/rock_savanna.wav')
s_filt = sound.select_bandwidth(s, fs, fcut=100, forder=3, ftype='highpass')

db_max = 70  # used to define the range of the spectrogram
Sxx, tn, fn, ext = sound.spectrogram(s_filt, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=db_max) + db_max
plot2d(Sxx_db, **{'extent': ext})

# 1. Find regions of interest
# ---------------------------
# To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed.

Sxx_db_rmbg, _, _ = sound.remove_background(Sxx_db)
Sxx_db_smooth = sound.smooth(Sxx_db_rmbg, std=1.2)
im_mask = rois.create_mask(im=Sxx_db_smooth,
# Load packages and set variables.
import glob
import matplotlib.pyplot as plt
from maad import sound, util

fpath = '../../data/indices/'  # location of audio files
sample_len = 3  # length in seconds of each audio slice

# Build a long list of audio slices of length `sample_len`.
flist = glob.glob(fpath + '*.wav')
long_wav = list()
for idx, fname in enumerate(flist):
    s, fs = sound.load(fname)
    s = sound.trim(s, fs, 0, sample_len)

# Combine all audio recordings applying a crossfade and compute a the spectrogram of
# the resulting mixed audio.
long_wav = util.crossfade_list(long_wav, fs, fade_len=0.5)
Sxx, tn, fn, ext = sound.spectrogram(long_wav,

# Display the spectrogram. We can see clearly the bird chorus at dawn (5-10 h) and
In this example, we will use unsupervised learning to automatically annotate multiple sounds in an audio recording.  The process follows four main steps. We will (i) find sounds that can be delimited in time and frequency, here defined as regions of interest (ROIs), (ii) characterize ROIs by features in the time-frequency domain using 2D wavelets [2], (iii) use t-SNE, a dimensionality reduction algorithm, to reduce the dimensionality of the data [3], and (iv) a automatically form homogenous groups using DBSCAN [4]. We will use a real audio file recorded with an omnidirectional microphone. This audio has a poor signal-to-noise ratio, which is typical of automated audio recordings.

Note: To execute this example you will need to have instaled the Python packages
matplotlib, scikit-image and scikit-learn.
# sphinx_gallery_thumbnail_path = '../_images/sphx_glr_plot_unsupervised_sound_classification_004.png'
import numpy as np
import matplotlib.pyplot as plt
from maad import sound, features, rois
from maad.util import power2dB, plot2D, format_features

# Start by loading an example audio file. Ambient noise will be removed with a lowpass filter and then we will compute the spectrogram.

s, fs = sound.load('/Users/jsulloa/Downloads/rock_savana.wav')
s_filt = sound.select_bandwidth(s, fs, fcut=100, forder=3, ftype='highpass')

db_max = 70  # used to define the range of the spectrogram
Sxx, tn, fn, ext = sound.spectrogram(s_filt, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=db_max) + db_max
plot2D(Sxx_db, **{'extent': ext})

# 1. Find regions of interest
# ---------------------------
# To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed.

Sxx_db_rmbg, _, _ = sound.remove_background(Sxx_db)
Sxx_db_smooth = sound.smooth(Sxx_db_rmbg, std=1.2)
im_mask = rois.create_mask(im=Sxx_db_smooth,

from maad.sound import load, spectrogram
from maad.features import shape_features, plot_shape
from maad.util import format_features, read_audacity_annot, power2dB
from maad.rois import overlay_rois

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn import preprocessing

s, fs = load('../data/spinetail.wav')
rois_tf = read_audacity_annot('../data/spinetail.txt')  ## annotations using Audacity
rois_cr = rois_tf.loc[rois_tf.label=='CRER',]  
rois_sp = rois_tf.loc[rois_tf.label=='SP',]

Sxx_power, ts, f, ext = spectrogram(s, fs)
Sxx_dB = power2dB(Sxx_power, db_range=90) + 96

# Visualize large vocalizations
rois_cr = format_features(rois_cr, ts, f)
overlay_rois(Sxx_dB, rois_cr, **{'extent':ext, 'vmin':0, 'vmax':80})

# Visualize short vocalizations
rois_sp = format_features(rois_sp, ts, f)
overlay_rois(Sxx_dB, rois_sp, **{'extent':ext, 'vmin':0, 'vmax':80})