def main(): start_time = time.time() window_size = int(round(WINDOW_SIZE * SAMPLE_RATE)) hop_size = int(round(HOP_SIZE * SAMPLE_RATE)) dft_size = tfa_utils.get_dft_size(window_size) print('window size', window_size) print('hop size', hop_size) print('DFT size', dft_size) num_spectra = int(round(APPROXIMATE_READ_SIZE / hop_size)) usual_read_size = (num_spectra - 1) * hop_size + window_size window = HannWindow(window_size).samples reader = WaveAudioFileReader(str(FILE_PATH), mono_1d=True) length = reader.length index = 0 while length - index >= window_size: # print(index) read_size = min(usual_read_size, length - index) samples = reader.read(index, read_size) gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size) # inband_powers = compute_inband_powers(gram) num_spectra = len(gram) index += num_spectra * hop_size end_time = time.time() elapsed = end_time - start_time duration = reader.length / reader.sample_rate rate = duration / elapsed print(('Processed {:.1f} seconds of audio in {:.1f} seconds, {:.1f} ' 'times faster than real time.').format(duration, elapsed, rate))
def _create_signal_processor(self): s = self.settings fs = self._input_sample_rate window_size = _seconds_to_samples(s.window_size, fs) hop_size = _seconds_to_samples(s.window_size * s.hop_size / 100, fs) dft_size = tfa_utils.get_dft_size(window_size) spectrograph = _Spectrograph( 'Spectrograph', s.window_type, window_size, hop_size, dft_size, fs) bin_size = spectrograph.bin_size start_bin_num = _get_start_bin_num(s.start_frequency, bin_size) end_bin_num = _get_end_bin_num(s.end_frequency, bin_size) frequency_integrator = _FrequencyIntegrator( 'Frequency Integrator', start_bin_num, end_bin_num, spectrograph.output_sample_rate) fs = frequency_integrator.output_sample_rate power_filter = self._create_power_filter(fs) fs = power_filter.output_sample_rate delay = _seconds_to_samples(s.delay, fs) divider = _Divider('Divider', delay, fs) processors = [ spectrograph, frequency_integrator, power_filter, divider ] return _SignalProcessorChain( 'Detector', processors, self._input_sample_rate, self._debugging_listener)
def main(): start_time = time.time() window_size = int(round(WINDOW_SIZE * SAMPLE_RATE)) hop_size = int(round(HOP_SIZE * SAMPLE_RATE)) dft_size = tfa_utils.get_dft_size(window_size) print('window size', window_size) print('hop size', hop_size) print('DFT size', dft_size) num_spectra = int(round(APPROXIMATE_READ_SIZE / hop_size)) usual_read_size = (num_spectra - 1) * hop_size + window_size window = HannWindow(window_size).samples reader = WaveAudioFileReader(str(FILE_PATH), mono_1d=True) length = reader.length index = 0 while length - index >= window_size: # print(index) read_size = min(usual_read_size, length - index) samples = reader.read(index, read_size) gram = tfa_utils.compute_spectrogram( samples, window, hop_size, dft_size) inband_powers = compute_inband_powers(gram) num_spectra = len(inband_powers) index += num_spectra * hop_size end_time = time.time() elapsed = end_time - start_time duration = reader.length / reader.sample_rate rate = duration / elapsed print( ('Processed {:.1f} seconds of audio in {:.1f} seconds, {:.1f} ' 'times faster than real time.').format(duration, elapsed, rate))
def _test_stft(): sample_rate = 24000 epsilon = 1e-10 for window_size in (8, 12, 16, 20, 24, 28, 32, 48, 64): waveform = _create_sinusoid(window_size, sample_rate) waveforms = tf.expand_dims(waveform, 0) dft_size = tfa_utils.get_dft_size(window_size) stft = tf.signal.stft( waveforms, window_size, window_size, dft_size, None) gram = tf.abs(stft) ** 2 normalizing_scale_factor = 1 / (window_size / 2) ** 2 gram *= normalizing_scale_factor decibel_scale_factor = 10 / math.log(10) gram = 100 + decibel_scale_factor * tf.math.log(gram + epsilon) print(window_size, gram)
def _get_low_level_preprocessing_settings(mode, settings): s = settings fs = s.waveform_sample_rate s2f = signal_utils.seconds_to_frames # time slicing if mode == DATASET_MODE_INFERENCE: time_start_index = 0 else: time_start_index = s2f(s.waveform_start_time, fs) length = s2f(s.waveform_duration, fs) time_end_index = time_start_index + length # spectrogram window_size = s2f(s.spectrogram_window_size, fs) fraction = s.spectrogram_hop_size / 100 hop_size = s2f(s.spectrogram_window_size * fraction, fs) dft_size = tfa_utils.get_dft_size(window_size) # frequency slicing f2i = tfa_utils.get_dft_bin_num freq_start_index = f2i(s.spectrogram_start_freq, fs, dft_size) freq_end_index = f2i(s.spectrogram_end_freq, fs, dft_size) + 1 return (time_start_index, time_end_index, window_size, hop_size, dft_size, freq_start_index, freq_end_index)
def _create_signal_processor(self): s = self.settings fs = self._input_sample_rate window_size = _seconds_to_samples(s.window_size, fs) hop_size = _seconds_to_samples(s.window_size * s.hop_size / 100, fs) dft_size = tfa_utils.get_dft_size(window_size) spectrograph = _Spectrograph('Spectrograph', s.window_type, window_size, hop_size, dft_size, fs) bin_size = spectrograph.bin_size start_bin_num = _get_start_bin_num(s.start_frequency, bin_size) end_bin_num = _get_end_bin_num(s.end_frequency, bin_size) frequency_integrator = _FrequencyIntegrator( 'Frequency Integrator', start_bin_num, end_bin_num, spectrograph.output_sample_rate) fs = frequency_integrator.output_sample_rate power_filter = self._create_power_filter(fs) fs = power_filter.output_sample_rate delay = _seconds_to_samples(s.delay, fs) divider = _Divider('Divider', delay, fs) processors = [ spectrograph, frequency_integrator, power_filter, divider ] return _SignalProcessorChain('Detector', processors, self._input_sample_rate, self._debugging_listener)
def _get_low_level_preprocessing_settings(mode, settings): s = settings fs = s.waveform_sample_rate s2f = signal_utils.seconds_to_frames # time slicing if mode == DATASET_MODE_INFERENCE: time_start_index = 0 else: time_start_index = s2f(s.waveform_start_time, fs) length = s2f(s.waveform_duration, fs) time_end_index = time_start_index + length # spectrogram window_size = s2f(s.spectrogram_window_size, fs) fraction = s.spectrogram_hop_size / 100 hop_size = s2f(s.spectrogram_window_size * fraction, fs) dft_size = tfa_utils.get_dft_size(window_size) # frequency slicing f2i = tfa_utils.get_dft_bin_num freq_start_index = f2i(s.spectrogram_start_freq, fs, dft_size) freq_end_index = f2i(s.spectrogram_end_freq, fs, dft_size) + 1 return ( time_start_index, time_end_index, window_size, hop_size, dft_size, freq_start_index, freq_end_index)
def plot_spectrogram(samples, sample_rate, title, pdf_file): window_size_sec = .005 hop_size_percent = 20 window_size = int(round(window_size_sec * sample_rate)) window = signal.hanning(window_size, sym=False) hop_size = \ int(round(window_size_sec * hop_size_percent / 100 * sample_rate)) dft_size = 2 * tfa_utils.get_dft_size(window_size) gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size) gram = tfa_utils.linear_to_log(gram) # plot_histogram(gram) hop_size_sec = window_size_sec * hop_size_percent / 100 times = np.arange(len(gram)) * hop_size_sec + window_size_sec / 2 num_bins = dft_size / 2 + 1 bin_size = sample_rate / dft_size freqs = np.arange(num_bins) * bin_size x = gram.transpose() plt.figure(figsize=(12, 6)) start_time = times[0] - hop_size_sec / 2 end_time = times[-1] + hop_size_sec / 2 start_freq = freqs[0] end_freq = freqs[-1] extent = (start_time, end_time, start_freq, end_freq) # `vmin` and `vmax` were chosen by looking at histogram of spectrogram # values plotted by `plot_histogram` function. plt.imshow(x, cmap='gray_r', vmin=-25, vmax=125, origin='lower', extent=extent, aspect='auto') plt.title(title) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') # plt.ylim(0, 11000) pdf_file.savefig() plt.close()
def plot_spectrogram(samples, sample_rate, title, pdf_file): window_size_sec = .005 hop_size_percent = 20 window_size = int(round(window_size_sec * sample_rate)) window = signal.hanning(window_size, sym=False) hop_size = \ int(round(window_size_sec * hop_size_percent / 100 * sample_rate)) dft_size = 2 * tfa_utils.get_dft_size(window_size) gram = tfa_utils.compute_spectrogram(samples, window, hop_size, dft_size) gram = tfa_utils.linear_to_log(gram) # plot_histogram(gram) hop_size_sec = window_size_sec * hop_size_percent / 100 times = np.arange(len(gram)) * hop_size_sec + window_size_sec / 2 num_bins = dft_size / 2 + 1 bin_size = sample_rate / dft_size freqs = np.arange(num_bins) * bin_size x = gram.transpose() plt.figure(figsize=(12, 6)) start_time = times[0] - hop_size_sec / 2 end_time = times[-1] + hop_size_sec / 2 start_freq = freqs[0] end_freq = freqs[-1] extent = (start_time, end_time, start_freq, end_freq) # `vmin` and `vmax` were chosen by looking at histogram of spectrogram # values plotted by `plot_histogram` function. plt.imshow( x, cmap='gray_r', vmin=-25, vmax=125, origin='lower', extent=extent, aspect='auto') plt.title(title) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') # plt.ylim(0, 11000) pdf_file.savefig() plt.close()
def test_get_dft_size(self): cases = [ (1, 1), (2, 2), (3, 4), (4, 4), (5, 8), (6, 8), (7, 8), (8, 8), (9, 16) ] for window_size, expected in cases: actual = tfa_utils.get_dft_size(window_size) self.assertEqual(actual, expected)
def _get_low_level_spectrogram_settings(settings): s = settings fs = s.waveform_sample_rate s2f = signal_utils.seconds_to_frames # spectrogram window_size = s2f(s.spectrogram_window_size, fs) fraction = s.spectrogram_hop_size / 100 hop_size = s2f(s.spectrogram_window_size * fraction, fs) dft_size = tfa_utils.get_dft_size(window_size) # frequency slicing f2i = tfa_utils.get_dft_bin_num freq_start_index = f2i(s.spectrogram_start_freq, fs, dft_size) freq_end_index = f2i(s.spectrogram_end_freq, fs, dft_size) + 1 return (window_size, hop_size, dft_size, freq_start_index, freq_end_index)
def _test_stft_new(): epsilon = 1e-10 bin_num = 1 trial_count = 1000 for sample_rate in (22050, 24000, 32000, 41000, 48000): for window_dur in (.005, .010, .015): bin_value_sum = 0 for trial_num in range(trial_count): window_size = int(round(window_dur * sample_rate)) # waveform = _create_sinusoid(window_size, sample_rate) waveform = _create_white_noise(window_size) waveforms = tf.expand_dims(waveform, 0) dft_size = tfa_utils.get_dft_size(window_size) * 4 # window_fn = tf.signal.hann_window window_fn = None stft = tf.signal.stft( waveforms, window_size, window_size, dft_size, window_fn) gram = tf.abs(stft) ** 2 bin_value_sum += gram[0, 0, bin_num] # normalizing_scale_factor = 1 / (window_size / 2) ** 2 # gram *= normalizing_scale_factor # # decibel_scale_factor = 10 / math.log(10) # gram = 100 + decibel_scale_factor * tf.math.log(gram + epsilon) bin_value_avg = bin_value_sum / trial_count print( sample_rate, window_dur, window_size, dft_size, bin_value_avg.numpy())
def __init__(self, settings): self._settings = settings s = settings sample_rate = s.waveform_sample_rate # Get waveform trimming start and end indices. self._start_time_index = signal_utils.seconds_to_frames( s.waveform_start_time, sample_rate) waveform_length = signal_utils.seconds_to_frames( s.waveform_duration, sample_rate) self._end_time_index = self._start_time_index + waveform_length # Get spectrogram settings. window_size = signal_utils.seconds_to_frames( s.spectrogram_window_size, sample_rate) hop_size = signal_utils.seconds_to_frames( s.spectrogram_hop_size, sample_rate) dft_size = tfa_utils.get_dft_size(window_size) self._spectrogram_settings = Settings( window=data_windows.create_window('Hann', window_size), hop_size=hop_size, dft_size=dft_size, reference_power=1) # Get spectrogram shape. num_spectra = tfa_utils.get_num_analysis_records( waveform_length, window_size, hop_size) num_bins = dft_size // 2 + 1 self._spectrogram_shape = (num_spectra, num_bins) self._augmented_spectrogram_shape = (1,) + self._spectrogram_shape # Get spectrogram trimming start and end indices. self._start_freq_index = _freq_to_dft_bin_num( settings.spectrogram_start_freq, sample_rate, dft_size) self._end_freq_index = _freq_to_dft_bin_num( settings.spectrogram_end_freq, sample_rate, dft_size) + 1
def __init__(self, settings): self._settings = settings s = settings sample_rate = s.waveform_sample_rate # Get waveform trimming start and end indices. self._start_time_index = signal_utils.seconds_to_frames( s.waveform_start_time, sample_rate) waveform_length = signal_utils.seconds_to_frames( s.waveform_duration, sample_rate) self._end_time_index = self._start_time_index + waveform_length # Get spectrogram settings. window_size = signal_utils.seconds_to_frames(s.spectrogram_window_size, sample_rate) hop_size = signal_utils.seconds_to_frames(s.spectrogram_hop_size, sample_rate) dft_size = tfa_utils.get_dft_size(window_size) self._spectrogram_settings = Settings( window=data_windows.create_window('Hann', window_size), hop_size=hop_size, dft_size=dft_size, reference_power=1) # Get spectrogram shape. num_spectra = tfa_utils.get_num_analysis_records( waveform_length, window_size, hop_size) num_bins = dft_size // 2 + 1 self._spectrogram_shape = (num_spectra, num_bins) self._augmented_spectrogram_shape = (1, ) + self._spectrogram_shape # Get spectrogram trimming start and end indices. self._start_freq_index = _freq_to_dft_bin_num( settings.spectrogram_start_freq, sample_rate, dft_size) self._end_freq_index = _freq_to_dft_bin_num( settings.spectrogram_end_freq, sample_rate, dft_size) + 1