Пример #1
0
def main():

    start_time = time.time()

    window_size = int(round(WINDOW_SIZE * SAMPLE_RATE))
    hop_size = int(round(HOP_SIZE * SAMPLE_RATE))
    dft_size = tfa_utils.get_dft_size(window_size)

    print('window size', window_size)
    print('hop size', hop_size)
    print('DFT size', dft_size)

    num_spectra = int(round(APPROXIMATE_READ_SIZE / hop_size))
    usual_read_size = (num_spectra - 1) * hop_size + window_size
    window = HannWindow(window_size).samples
    reader = WaveAudioFileReader(str(FILE_PATH), mono_1d=True)
    length = reader.length
    index = 0
    while length - index >= window_size:
        # print(index)
        read_size = min(usual_read_size, length - index)
        samples = reader.read(index, read_size)
        gram = tfa_utils.compute_spectrogram(samples, window, hop_size,
                                             dft_size)
        # inband_powers = compute_inband_powers(gram)
        num_spectra = len(gram)
        index += num_spectra * hop_size

    end_time = time.time()
    elapsed = end_time - start_time
    duration = reader.length / reader.sample_rate
    rate = duration / elapsed
    print(('Processed {:.1f} seconds of audio in {:.1f} seconds, {:.1f} '
           'times faster than real time.').format(duration, elapsed, rate))
Пример #2
0
 def _create_signal_processor(self):
     
     s = self.settings
     
     fs = self._input_sample_rate
     window_size = _seconds_to_samples(s.window_size, fs)
     hop_size = _seconds_to_samples(s.window_size * s.hop_size / 100, fs)
     dft_size = tfa_utils.get_dft_size(window_size)
     spectrograph = _Spectrograph(
         'Spectrograph', s.window_type, window_size, hop_size, dft_size, fs)
     
     bin_size = spectrograph.bin_size
     start_bin_num = _get_start_bin_num(s.start_frequency, bin_size)
     end_bin_num = _get_end_bin_num(s.end_frequency, bin_size)
     frequency_integrator = _FrequencyIntegrator(
         'Frequency Integrator', start_bin_num, end_bin_num,
         spectrograph.output_sample_rate)
     
     fs = frequency_integrator.output_sample_rate
     power_filter = self._create_power_filter(fs)
     
     fs = power_filter.output_sample_rate
     delay = _seconds_to_samples(s.delay, fs)
     divider = _Divider('Divider', delay, fs)
     
     processors = [
         spectrograph,
         frequency_integrator,
         power_filter,
         divider
     ]
     
     return _SignalProcessorChain(
         'Detector', processors, self._input_sample_rate,
         self._debugging_listener)
def main():
    
    start_time = time.time()
    
    window_size = int(round(WINDOW_SIZE * SAMPLE_RATE))
    hop_size = int(round(HOP_SIZE * SAMPLE_RATE))
    dft_size = tfa_utils.get_dft_size(window_size)
    
    print('window size', window_size)
    print('hop size', hop_size)
    print('DFT size', dft_size)
    
    num_spectra = int(round(APPROXIMATE_READ_SIZE / hop_size))
    usual_read_size = (num_spectra - 1) * hop_size + window_size
    window = HannWindow(window_size).samples
    reader = WaveAudioFileReader(str(FILE_PATH), mono_1d=True)
    length = reader.length
    index = 0
    while length - index >= window_size:
        # print(index)
        read_size = min(usual_read_size, length - index)
        samples = reader.read(index, read_size)
        gram = tfa_utils.compute_spectrogram(
            samples, window, hop_size, dft_size)
        inband_powers = compute_inband_powers(gram)
        num_spectra = len(inband_powers)
        index += num_spectra * hop_size
        
    end_time = time.time()
    elapsed = end_time - start_time
    duration = reader.length / reader.sample_rate
    rate = duration / elapsed
    print(
        ('Processed {:.1f} seconds of audio in {:.1f} seconds, {:.1f} '
         'times faster than real time.').format(duration, elapsed, rate))
Пример #4
0
def _test_stft():
    
    sample_rate = 24000
    epsilon = 1e-10
    
    for window_size in (8, 12, 16, 20, 24, 28, 32, 48, 64):
        
        waveform = _create_sinusoid(window_size, sample_rate)
        
        waveforms = tf.expand_dims(waveform, 0)
        
        dft_size = tfa_utils.get_dft_size(window_size)
        
        stft = tf.signal.stft(
            waveforms, window_size, window_size, dft_size, None)
        
        gram = tf.abs(stft) ** 2
        
        normalizing_scale_factor = 1 / (window_size / 2) ** 2
        gram *= normalizing_scale_factor
         
        decibel_scale_factor = 10 / math.log(10)
        gram = 100 + decibel_scale_factor * tf.math.log(gram + epsilon)
        
        print(window_size, gram)
Пример #5
0
def _get_low_level_preprocessing_settings(mode, settings):

    s = settings
    fs = s.waveform_sample_rate
    s2f = signal_utils.seconds_to_frames

    # time slicing
    if mode == DATASET_MODE_INFERENCE:
        time_start_index = 0
    else:
        time_start_index = s2f(s.waveform_start_time, fs)
    length = s2f(s.waveform_duration, fs)
    time_end_index = time_start_index + length

    # spectrogram
    window_size = s2f(s.spectrogram_window_size, fs)
    fraction = s.spectrogram_hop_size / 100
    hop_size = s2f(s.spectrogram_window_size * fraction, fs)
    dft_size = tfa_utils.get_dft_size(window_size)

    # frequency slicing
    f2i = tfa_utils.get_dft_bin_num
    freq_start_index = f2i(s.spectrogram_start_freq, fs, dft_size)
    freq_end_index = f2i(s.spectrogram_end_freq, fs, dft_size) + 1

    return (time_start_index, time_end_index, window_size, hop_size, dft_size,
            freq_start_index, freq_end_index)
Пример #6
0
    def _create_signal_processor(self):

        s = self.settings

        fs = self._input_sample_rate
        window_size = _seconds_to_samples(s.window_size, fs)
        hop_size = _seconds_to_samples(s.window_size * s.hop_size / 100, fs)
        dft_size = tfa_utils.get_dft_size(window_size)
        spectrograph = _Spectrograph('Spectrograph', s.window_type,
                                     window_size, hop_size, dft_size, fs)

        bin_size = spectrograph.bin_size
        start_bin_num = _get_start_bin_num(s.start_frequency, bin_size)
        end_bin_num = _get_end_bin_num(s.end_frequency, bin_size)
        frequency_integrator = _FrequencyIntegrator(
            'Frequency Integrator', start_bin_num, end_bin_num,
            spectrograph.output_sample_rate)

        fs = frequency_integrator.output_sample_rate
        power_filter = self._create_power_filter(fs)

        fs = power_filter.output_sample_rate
        delay = _seconds_to_samples(s.delay, fs)
        divider = _Divider('Divider', delay, fs)

        processors = [
            spectrograph, frequency_integrator, power_filter, divider
        ]

        return _SignalProcessorChain('Detector', processors,
                                     self._input_sample_rate,
                                     self._debugging_listener)
Пример #7
0
def _get_low_level_preprocessing_settings(mode, settings):
    
    s = settings
    fs = s.waveform_sample_rate
    s2f = signal_utils.seconds_to_frames
    
    # time slicing
    if mode == DATASET_MODE_INFERENCE:
        time_start_index = 0
    else:
        time_start_index = s2f(s.waveform_start_time, fs)
    length = s2f(s.waveform_duration, fs)
    time_end_index = time_start_index + length
    
    # spectrogram
    window_size = s2f(s.spectrogram_window_size, fs)
    fraction = s.spectrogram_hop_size / 100
    hop_size = s2f(s.spectrogram_window_size * fraction, fs)
    dft_size = tfa_utils.get_dft_size(window_size)
    
    # frequency slicing
    f2i = tfa_utils.get_dft_bin_num
    freq_start_index = f2i(s.spectrogram_start_freq, fs, dft_size)
    freq_end_index = f2i(s.spectrogram_end_freq, fs, dft_size) + 1
    
    return (
        time_start_index, time_end_index, window_size, hop_size, dft_size,
        freq_start_index, freq_end_index)
Пример #8
0
def plot_spectrogram(samples, sample_rate, title, pdf_file):

    window_size_sec = .005
    hop_size_percent = 20

    window_size = int(round(window_size_sec * sample_rate))
    window = signal.hanning(window_size, sym=False)
    hop_size = \
        int(round(window_size_sec * hop_size_percent / 100 * sample_rate))

    dft_size = 2 * tfa_utils.get_dft_size(window_size)

    gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size)

    gram = tfa_utils.linear_to_log(gram)

    # plot_histogram(gram)

    hop_size_sec = window_size_sec * hop_size_percent / 100
    times = np.arange(len(gram)) * hop_size_sec + window_size_sec / 2

    num_bins = dft_size / 2 + 1
    bin_size = sample_rate / dft_size
    freqs = np.arange(num_bins) * bin_size

    x = gram.transpose()

    plt.figure(figsize=(12, 6))

    start_time = times[0] - hop_size_sec / 2
    end_time = times[-1] + hop_size_sec / 2
    start_freq = freqs[0]
    end_freq = freqs[-1]
    extent = (start_time, end_time, start_freq, end_freq)

    # `vmin` and `vmax` were chosen by looking at histogram of spectrogram
    # values plotted by `plot_histogram` function.
    plt.imshow(x,
               cmap='gray_r',
               vmin=-25,
               vmax=125,
               origin='lower',
               extent=extent,
               aspect='auto')

    plt.title(title)
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    # plt.ylim(0, 11000)

    pdf_file.savefig()

    plt.close()
def plot_spectrogram(samples, sample_rate, title, pdf_file):
    
    window_size_sec = .005
    hop_size_percent = 20
    
    window_size = int(round(window_size_sec * sample_rate))
    window = signal.hanning(window_size, sym=False)
    hop_size = \
        int(round(window_size_sec * hop_size_percent / 100 * sample_rate))
        
    dft_size = 2 * tfa_utils.get_dft_size(window_size)
    
    gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size)
    
    gram = tfa_utils.linear_to_log(gram)
    
    # plot_histogram(gram)
    
    hop_size_sec = window_size_sec * hop_size_percent / 100
    times = np.arange(len(gram)) * hop_size_sec + window_size_sec / 2
    
    num_bins = dft_size / 2 + 1
    bin_size = sample_rate / dft_size
    freqs = np.arange(num_bins) * bin_size
        
    x = gram.transpose()
    
    plt.figure(figsize=(12, 6))
        
    start_time = times[0] - hop_size_sec / 2
    end_time = times[-1] + hop_size_sec / 2
    start_freq = freqs[0]
    end_freq = freqs[-1]
    extent = (start_time, end_time, start_freq, end_freq)
    
    # `vmin` and `vmax` were chosen by looking at histogram of spectrogram
    # values plotted by `plot_histogram` function.
    plt.imshow(
        x, cmap='gray_r', vmin=-25, vmax=125, origin='lower', extent=extent,
        aspect='auto')
    
    plt.title(title)
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    # plt.ylim(0, 11000)

    pdf_file.savefig()
    
    plt.close()
    def test_get_dft_size(self):

        cases = [
            (1, 1),
            (2, 2),
            (3, 4),
            (4, 4),
            (5, 8),
            (6, 8),
            (7, 8),
            (8, 8),
            (9, 16)
        ]

        for window_size, expected in cases:
            actual = tfa_utils.get_dft_size(window_size)
            self.assertEqual(actual, expected)
Пример #11
0
def _get_low_level_spectrogram_settings(settings):
     
    s = settings
    fs = s.waveform_sample_rate
    s2f = signal_utils.seconds_to_frames
     
    # spectrogram
    window_size = s2f(s.spectrogram_window_size, fs)
    fraction = s.spectrogram_hop_size / 100
    hop_size = s2f(s.spectrogram_window_size * fraction, fs)
    dft_size = tfa_utils.get_dft_size(window_size)
     
    # frequency slicing
    f2i = tfa_utils.get_dft_bin_num
    freq_start_index = f2i(s.spectrogram_start_freq, fs, dft_size)
    freq_end_index = f2i(s.spectrogram_end_freq, fs, dft_size) + 1
    
    return (window_size, hop_size, dft_size, freq_start_index, freq_end_index)
Пример #12
0
def _test_stft_new():
    
    epsilon = 1e-10
    bin_num = 1
    trial_count = 1000
    
    for sample_rate in (22050, 24000, 32000, 41000, 48000):
        
        for window_dur in (.005, .010, .015):
            
            bin_value_sum = 0
            
            for trial_num in range(trial_count):
            
                window_size = int(round(window_dur * sample_rate))
                
                # waveform = _create_sinusoid(window_size, sample_rate)
                waveform = _create_white_noise(window_size)
                
                waveforms = tf.expand_dims(waveform, 0)
                
                dft_size = tfa_utils.get_dft_size(window_size) * 4
                
                # window_fn = tf.signal.hann_window
                window_fn = None
                stft = tf.signal.stft(
                    waveforms, window_size, window_size, dft_size, window_fn)
                
                gram = tf.abs(stft) ** 2
                
                bin_value_sum += gram[0, 0, bin_num]
                
#                 normalizing_scale_factor = 1 / (window_size / 2) ** 2
#                 gram *= normalizing_scale_factor
#                    
#                 decibel_scale_factor = 10 / math.log(10)
#                 gram = 100 + decibel_scale_factor * tf.math.log(gram + epsilon)
            
            bin_value_avg = bin_value_sum / trial_count
            print(
                sample_rate, window_dur, window_size, dft_size,
                bin_value_avg.numpy())
Пример #13
0
    def __init__(self, settings):

        self._settings = settings

        s = settings
        sample_rate = s.waveform_sample_rate

        # Get waveform trimming start and end indices.
        self._start_time_index = signal_utils.seconds_to_frames(
            s.waveform_start_time, sample_rate)
        waveform_length = signal_utils.seconds_to_frames(
            s.waveform_duration, sample_rate)
        self._end_time_index = self._start_time_index + waveform_length

        # Get spectrogram settings.
        window_size = signal_utils.seconds_to_frames(
            s.spectrogram_window_size, sample_rate)
        hop_size = signal_utils.seconds_to_frames(
            s.spectrogram_hop_size, sample_rate)
        dft_size = tfa_utils.get_dft_size(window_size)
        self._spectrogram_settings = Settings(
            window=data_windows.create_window('Hann', window_size),
            hop_size=hop_size,
            dft_size=dft_size,
            reference_power=1)

        # Get spectrogram shape.
        num_spectra = tfa_utils.get_num_analysis_records(
            waveform_length, window_size, hop_size)
        num_bins = dft_size // 2 + 1
        self._spectrogram_shape = (num_spectra, num_bins)
        self._augmented_spectrogram_shape = (1,) + self._spectrogram_shape

        # Get spectrogram trimming start and end indices.
        self._start_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_start_freq, sample_rate, dft_size)
        self._end_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_end_freq, sample_rate, dft_size) + 1
Пример #14
0
    def __init__(self, settings):

        self._settings = settings

        s = settings
        sample_rate = s.waveform_sample_rate

        # Get waveform trimming start and end indices.
        self._start_time_index = signal_utils.seconds_to_frames(
            s.waveform_start_time, sample_rate)
        waveform_length = signal_utils.seconds_to_frames(
            s.waveform_duration, sample_rate)
        self._end_time_index = self._start_time_index + waveform_length

        # Get spectrogram settings.
        window_size = signal_utils.seconds_to_frames(s.spectrogram_window_size,
                                                     sample_rate)
        hop_size = signal_utils.seconds_to_frames(s.spectrogram_hop_size,
                                                  sample_rate)
        dft_size = tfa_utils.get_dft_size(window_size)
        self._spectrogram_settings = Settings(
            window=data_windows.create_window('Hann', window_size),
            hop_size=hop_size,
            dft_size=dft_size,
            reference_power=1)

        # Get spectrogram shape.
        num_spectra = tfa_utils.get_num_analysis_records(
            waveform_length, window_size, hop_size)
        num_bins = dft_size // 2 + 1
        self._spectrogram_shape = (num_spectra, num_bins)
        self._augmented_spectrogram_shape = (1, ) + self._spectrogram_shape

        # Get spectrogram trimming start and end indices.
        self._start_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_start_freq, sample_rate, dft_size)
        self._end_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_end_freq, sample_rate, dft_size) + 1