Exemplo n.º 1
0
def get_default_threshold(audio, window_duration=5.0):
    """Compute a default threshold over all windows in a signal

    Uses compute_smart_threshold in every time window for an
    entire audio signal. Returns the median threshold (after
    removing outliers) and std deviation of those thresholds.

    This can be useful as a reference when computing thresholds later on
    to reject thresholds that seem out of the ordinary

    Parameters
    ==========
    audio : instance of interfaces.audio.AudioSliceInterface
    window_duration : float
        Width in seconds of each window to compute threshold in
    """

    all_thresholds_list = []

    for t in np.arange(0, len(audio) / audio.sampling_rate, window_duration):
        end_time = min(t + window_duration, len(audio) / audio.sampling_rate)

        sliced = audio.time_slice(t, end_time)
        t_arr = sliced.t
        sig = sliced.data
        sig = sig - np.mean(sig, axis=0)
        sig = bandpass_filter(sig.T, audio.sampling_rate, 1000, 8000).T
        amp_env = get_amplitude_envelope(sig, audio.sampling_rate, highpass=1000, lowpass=8000)
        amp_env = np.mean(amp_env, axis=1)
        threshold = compute_smart_threshold(amp_env, sampling_rate=audio.sampling_rate)
        all_thresholds_list.append(threshold)

    all_thresholds_list = np.array(all_thresholds_list)

    # Remove outlier thresholds
    clf = LocalOutlierFactor(n_neighbors=5, contamination=0.1, algorithm="kd_tree")
    predicted_outlier = clf.fit_predict(all_thresholds_list[:, None])

    # Return median (non-outlier) threshold and std
    return (
        np.median(all_thresholds_list[predicted_outlier == 1]),
        np.std(all_thresholds_list[predicted_outlier == 1])
    )
Exemplo n.º 2
0
    print("Extracting spectrograms from {} intervals".format(len(intervals)))
    for idx, (t1, t2) in enumerate(intervals):
        print("Working on {}/{} ({:.2f}s elapsed)".format(
            idx + 1, len(intervals),
            time.time() - _time),
              end="\r")

        # Recentered signal with a small buffer of 40ms on either side
        buffer = 0.01
        t_arr, sig = audio_signal.time_slice(
            max(0, t1 - buffer), min(audio_signal.t_max, t2 + buffer))
        sig = sig - np.mean(sig, axis=0)
        sig = bandpass_filter(sig.T, audio_signal.sampling_rate, 1000, 8000).T

        amp_env = get_amplitude_envelope(sig,
                                         fs=audio_signal.sampling_rate,
                                         lowpass=8000,
                                         highpass=1000)

        # Compute the temporal center of mass of the signal
        center_of_mass = t1 - buffer + np.sum(
            (t_arr * np.sum(amp_env, axis=1))) / np.sum(amp_env)

        # Recentered signal with a small buffer of 40ms on either side
        buffer = 0.08
        t_arr, sig = audio_signal.time_slice(
            max(0, center_of_mass - buffer),
            min(audio_signal.t_max, center_of_mass + buffer))
        sig = sig - np.mean(sig, axis=0)
        sig = bandpass_filter(sig.T, audio_signal.sampling_rate, 1000, 8000).T

        specs = []
Exemplo n.º 3
0
def extract_spectrograms(
        audio_signal,
        intervals,
        buffer=0.02,  # buffer around original interval to keep
        spec_buffer=0.04,  # buffer so that everything thas the right padding.
):
    """Extract spectrograms from audio_signal denoted by intervals list

    intervals is a list of (t_start, t_end) tuples
    """
    _time = time.time()
    print("Extracting spectrograms from {} intervals".format(len(intervals)))
    # all_calls = []
    all_call_spectrograms = []
    for idx, (t1, t2) in enumerate(intervals):
        print("Working on {}/{} ({:.2f}s elapsed)".format(
            idx + 1, len(intervals),
            time.time() - _time),
              end="\r")

        # Recentered signal with a small buffer of 40ms on either side
        t_arr, sig = audio_signal.time_slice(
            max(0, t1 - buffer), min(audio_signal.t_max, t2 + buffer))
        sig = sig - np.mean(sig, axis=0)
        sig = bandpass_filter(sig.T, audio_signal.sampling_rate, 1000, 8000).T

        amp_env = get_amplitude_envelope(sig,
                                         fs=audio_signal.sampling_rate,
                                         lowpass=8000,
                                         highpass=1000)

        # Compute the temporal center of mass of the signal
        center_of_mass = t1 - buffer + np.sum(
            (t_arr * np.sum(amp_env, axis=1))) / np.sum(amp_env)

        # Recentered signal with a small buffer of 40ms on either side
        t_arr, sig = audio_signal.time_slice(
            max(0, center_of_mass - spec_buffer),
            min(audio_signal.t_max, center_of_mass + spec_buffer))
        sig = sig - np.mean(sig, axis=0)
        sig = bandpass_filter(sig.T, audio_signal.sampling_rate, 1000, 8000).T

        specs = []
        # all_calls.append(sig)
        for ch in range(sig.shape[1]):
            # Sligtly lower resolution on the spectrograms can make this go faster
            # Can increase the params to 1000, 50 for a higher resolution spectrogram
            _, _, spec, _ = spectrogram(sig[:, ch],
                                        audio_signal.sampling_rate,
                                        500,
                                        100,
                                        min_freq=1000,
                                        max_freq=8000,
                                        cmplx=False)
            specs.append(spec)

        all_call_spectrograms.append(np.array(specs))

    all_call_spectrograms = np.array(all_call_spectrograms)
    # all_calls = np.array(all_calls)
    return all_call_spectrograms
Exemplo n.º 4
0
def threshold_all_events(
        audio_signal,
        window_size=10.0,
        channel=0,
        t_start=None,
        t_stop=None,
        ignore_width=0.05,
        min_size=0.05,
        fuse_duration=0.5,
        threshold_z=3.0,
        highpass=1000,
        lowpass=8000,
        amp_env_mode="broadband"
    ):
    """Find intervals of potential vocalizations periods (in seconds)

    The last two windows are combined in case the duration is not an
    even multiple of the window_size

    amp_env_mode can be "broadband" or "max_zscore"
    """
    sampling_rate = audio_signal.sampling_rate
    signal_duration = len(audio_signal) / sampling_rate
    if window_size is None:
        window_starts = np.array([0.0 if t_start is None else t_start])
        window_stops = np.array([audio_signal.t_max if t_stop is None else t_stop])

    else:
        window_starts = np.arange(0, signal_duration - window_size, window_size)
        window_stops = window_starts + window_size
        window_stops[-1] = signal_duration

        if t_start:
            mask = window_starts >= t_start
        else:
            mask = np.ones_like(window_starts).astype(np.bool)
        if t_stop:
            mask = mask.astype(np.bool) & (window_stops <= t_stop)

        window_starts = window_starts[mask]
        window_stops = window_stops[mask]

    last_interval_to_check = None
    all_intervals = []

    for window_start, window_stop in zip(window_starts, window_stops):
        t_arr, window_signal = audio_signal.time_slice(window_start, window_stop)
        window_signal = window_signal - np.mean(window_signal, axis=0)
        window_signal = bandpass_filter(window_signal.T, sampling_rate, 1000, 8000).T
        amp_env = get_amplitude_envelope(
            window_signal[:, channel],
            sampling_rate,
            highpass=highpass,
            lowpass=lowpass,
            mode=amp_env_mode
        )

        threshold = compute_smart_threshold(
            amp_env,
            sampling_rate=sampling_rate,
            z=threshold_z
        )

        intervals = threshold_events(
            amp_env,
            threshold,
            sampling_rate=sampling_rate,
            ignore_width=ignore_width,
            min_size=min_size,
            fuse_duration=fuse_duration
        )

        # Here begins the code that merges intervals across windows
        if last_interval_to_check is not None:
            if not len(intervals):
                all_intervals.append(last_interval_to_check)
            elif intervals[0][0] < (0.5 * sampling_rate):
                all_intervals.append((
                    last_interval_to_check[0],
                    intervals[0][1] / sampling_rate + window_start
                ))
                intervals = intervals[1:]
            else:
                all_intervals.append(last_interval_to_check)
                all_intervals.append((
                    intervals[0][0] / sampling_rate + window_start,
                    intervals[0][1] / sampling_rate + window_start
                ))
                intervals = intervals[1:]
            last_interval_to_check = None

        for i0, i1 in intervals:
            if i1 == len(window_signal):
                last_interval_to_check = (
                    i0 / sampling_rate + window_start,
                    i1 / sampling_rate + window_start
                )
                break
            all_intervals.append((
                i0 / sampling_rate + window_start,
                i1 / sampling_rate + window_start
            ))

    if last_interval_to_check is not None:
        all_intervals.append(last_interval_to_check)

    return all_intervals
Exemplo n.º 5
0
def split_individual_events(
        signal,
        sampling_rate,
        expected_call_max_duration=1.0,
        max_tries=10,
        scale_factor=1.25,
        amp_env_mode="broadband",
    ):
    """Divide a signal interval into individual putative events

    This function assumes that the input is a signal that already contains a lot
    of sound (detected by thresholding) and wants to split it up into individual
    events by creating a second, more conservative threshold.

    It is recommended to include some padding in the signal so that the detector
    can better find a baseline (e.g. include the period between 1.0s and 4.0s
    for a vocal period detected at 2.0s to 3.0s)

    TODO: do this across two channels?
    """
    if signal.ndim == 1:
        signal = signal - np.mean(signal)
    else:
        signal = (signal - np.mean(signal, axis=0))[:, 0]

    amp_env = get_amplitude_envelope(
        signal,
        sampling_rate,
        highpass=1000,
        lowpass=8000,
        mode=amp_env_mode,
    )

    threshold = compute_smart_threshold(amp_env, sampling_rate)
    # Compute intervals within this period, preferentially separating sounds
    # that are separated by more than 20ms of silence.
    # Then, gradually raise the threshold until the longest period detected is
    # no greater than the defined max_length (default 1s)

    idx = 0
    while idx < max_tries:
        intervals = threshold_events(
            amp_env,
            threshold,
            polarity=1,
            sampling_rate=sampling_rate,
            ignore_width=0.02,
            min_size=0.01,
            fuse_duration=0.02,
        )
        durations = [np.diff(x) / sampling_rate for x in intervals]
        if len(durations) and np.max(durations) > expected_call_max_duration:
            threshold *= scale_factor
        else:
            break

        idx += 1

    if not len(intervals):
        return [[0, len(signal)]]
    else:
        return intervals