def __init__(self, output_path: str, mw: aqt.AnkiQt, parent: QWidget) -> None: super().__init__(output_path) self.mw = mw self._parent = parent from PyQt5.QtMultimedia import QAudioDeviceInfo, QAudioFormat, QAudioInput format = QAudioFormat() format.setChannelCount(1) format.setSampleRate(44100) format.setSampleSize(16) format.setCodec("audio/pcm") format.setByteOrder(QAudioFormat.LittleEndian) format.setSampleType(QAudioFormat.SignedInt) device = QAudioDeviceInfo.defaultInputDevice() if not device.isFormatSupported(format): format = device.nearestFormat(format) print("format changed") print("channels", format.channelCount()) print("rate", format.sampleRate()) print("size", format.sampleSize()) self._format = format self._audio_input = QAudioInput(device, format, parent)
def populateTable(self): row = 0 format = QAudioFormat() for codec in self.deviceInfo.supportedCodecs(): format.setCodec(codec) for sampleRate in self.deviceInfo.supportedSampleRates(): format.setSampleRate(sampleRate) for channels in self.deviceInfo.supportedChannelCounts(): format.setChannelCount(channels) for sampleType in self.deviceInfo.supportedSampleTypes(): format.setSampleType(sampleType) for sampleSize in self.deviceInfo.supportedSampleSizes( ): format.setSampleSize(sampleSize) for endian in self.deviceInfo.supportedByteOrders( ): format.setByteOrder(endian) if self.deviceInfo.isFormatSupported(format): self.allFormatsTable.setRowCount(row + 1) self.setFormatValue(row, 0, format.codec()) self.setFormatValue( row, 1, str(format.sampleRate())) self.setFormatValue( row, 2, str(format.channelCount())) self.setFormatValue( row, 3, self.sampleTypeToString( format.sampleType()), ) self.setFormatValue( row, 4, str(format.sampleSize())) self.setFormatValue( row, 5, self.endianToString( format.byteOrder())) row += 1
def populateTable(self): row = 0 format = QAudioFormat() for codec in self.deviceInfo.supportedCodecs(): format.setCodec(codec) for sampleRate in self.deviceInfo.supportedSampleRates(): format.setSampleRate(sampleRate) for channels in self.deviceInfo.supportedChannelCounts(): format.setChannelCount(channels) for sampleType in self.deviceInfo.supportedSampleTypes(): format.setSampleType(sampleType) for sampleSize in self.deviceInfo.supportedSampleSizes(): format.setSampleSize(sampleSize) for endian in self.deviceInfo.supportedByteOrders(): format.setByteOrder(endian) if self.deviceInfo.isFormatSupported(format): self.allFormatsTable.setRowCount(row + 1) self.setFormatValue(row, 0, format.codec()) self.setFormatValue(row, 1, str(format.sampleRate())) self.setFormatValue(row, 2, str(format.channelCount())) self.setFormatValue(row, 3, self.sampleTypeToString( format.sampleType())) self.setFormatValue(row, 4, str(format.sampleSize())) self.setFormatValue(row, 5, self.endianToString( format.byteOrder())) row += 1
class SignalProc: """ This class reads and holds the audiodata and spectrogram, to be used in the main interface. Inverse, denoise, and other processing algorithms are provided here. Also bandpass and Butterworth bandpass filters. Primary parameters are the width of a spectrogram window (window_width) and the shift between them (incr) """ def __init__(self, window_width=256, incr=128, minFreqShow=0, maxFreqShow=0): # maxFreq = 0 means fall back to Fs/2 for any file. self.window_width=window_width self.incr=incr self.minFreqShow = minFreqShow self.maxFreqShow = maxFreqShow self.data = [] # only accepting wav files of this format self.audioFormat = QAudioFormat() self.audioFormat.setCodec("audio/pcm") self.audioFormat.setByteOrder(QAudioFormat.LittleEndian) self.audioFormat.setSampleType(QAudioFormat.SignedInt) def readWav(self, file, len=None, off=0, silent=False): """ Args the same as for wavio.read: filename, length in seconds, offset in seconds. """ wavobj = wavio.read(file, len, off) self.data = wavobj.data # take only left channel if np.shape(np.shape(self.data))[0] > 1: self.data = self.data[:, 0] self.audioFormat.setChannelCount(1) # force float type if self.data.dtype != 'float': self.data = self.data.astype('float') self.audioFormat.setSampleSize(wavobj.sampwidth * 8) # total file length in s read from header (useful for paging) self.fileLength = wavobj.nframes self.sampleRate = wavobj.rate self.audioFormat.setSampleRate(self.sampleRate) # *Freq sets hard bounds, *Show can limit the spec display self.minFreq = 0 self.maxFreq = self.sampleRate // 2 self.minFreqShow = max(self.minFreq, self.minFreqShow) self.maxFreqShow = min(self.maxFreq, self.maxFreqShow) if not silent: print("Detected format: %d channels, %d Hz, %d bit samples" % (self.audioFormat.channelCount(), self.audioFormat.sampleRate(), self.audioFormat.sampleSize())) def resample(self, target): if len(self.data)==0: print("Warning: no data set to resmample") return if target==self.sampleRate: print("No resampling needed") return self.data = librosa.core.audio.resample(self.data, self.sampleRate, target) self.sampleRate = target self.audioFormat.setSampleRate(target) self.minFreq = 0 self.maxFreq = self.sampleRate // 2 self.fileLength = len(self.data) def convertAmpltoSpec(self, x): """ Unit conversion, for easier use wherever spectrograms are needed """ return x*self.sampleRate/self.incr def setWidth(self,window_width,incr): # Does what it says. Called when the user modifies the spectrogram parameters self.window_width = window_width self.incr = incr def setData(self,audiodata,sampleRate=None): self.data = audiodata if sampleRate is not None: self.sampleRate = sampleRate def SnNR(self,startSignal,startNoise): # Compute the estimated signal-to-noise ratio pS = np.sum(self.data[startSignal:startSignal+self.length]**2)/self.length pN = np.sum(self.data[startNoise:startNoise+self.length]**2)/self.length return 10.*np.log10(pS/pN) def equalLoudness(self,data): # TODO: Assumes 16000 sampling rate, fix! # Basically, save a few more sets of filter coefficients... # Basic equal loudness curve. # This is for humans, NOT birds (there is a paper that claims to have some, but I can't access it: # https://doi.org/10.1121/1.428951) # The filter weights were obtained from Matlab (using yulewalk) for the standard 80 dB ISO curve # for a sampling rate of 16000 # 10 coefficient Yule-Walker fit for [0,120;20,113;30,103;40,97;50,93;60,91;70,89;80,87;90,86;100,85;200,78;300,76;400,76;500,76;600,76;700,77;800,78;900,79.5;1000,80;1500,79;2000,77;2500,74;3000,71.5;3700,70;4000,70.5;5000,74;6000,79;7000,84;8000,86] # Or at least, EL80(:,1)./(fs/2) and m=10.^((70-EL80(:,2))/20); ay = np.array([1.0000,-0.6282, 0.2966,-0.3726,0.0021,-0.4203,0.2220,0.0061, 0.0675, 0.0578,0.0322]) by = np.array([0.4492,-0.1435,-0.2278,-0.0142,0.0408,-0.1240,0.0410,0.1048,-0.0186,-0.0319,0.0054]) # Butterworth highpass ab = np.array([1.0000,-1.9167,0.9201]) bb = np.array([0.9592,-1.9184,0.9592]) data = signal.lfilter(by,ay,data) data = signal.lfilter(bb,ab,data) return data # from memory_profiler import profile # fp = open('memory_profiler_sp.log', 'w+') # @profile(stream=fp) def spectrogram(self, window_width=None,incr=None,window='Hann',equal_loudness=False,mean_normalise=True,onesided=True,multitaper=False,need_even=False): """ Compute the spectrogram from amplitude data Returns the power spectrum, not the density -- compute 10.*log10(sg) 10.*log10(sg) before plotting. Uses absolute value of the FT, not FT*conj(FT), 'cos it seems to give better discrimination Options: multitaper version, but it's slow, mean normalised, even, one-sided. This version is faster than the default versions in pylab and scipy.signal Assumes that the values are not normalised. """ if self.data is None or len(self.data)==0: print("ERROR: attempted to calculate spectrogram without audiodata") return if window_width is None: window_width = self.window_width if incr is None: incr = self.incr # clean handling of very short segments: if len(self.data) <= window_width: window_width = len(self.data) - 1 self.sg = np.copy(self.data) if self.sg.dtype != 'float': self.sg = self.sg.astype('float') # Set of window options if window=='Hann': # This is the Hann window window = 0.5 * (1 - np.cos(2 * np.pi * np.arange(window_width) / (window_width - 1))) elif window=='Parzen': # Parzen (window_width even) n = np.arange(window_width) - 0.5*window_width window = np.where(np.abs(n)<0.25*window_width,1 - 6*(n/(0.5*window_width))**2*(1-np.abs(n)/(0.5*window_width)), 2*(1-np.abs(n)/(0.5*window_width))**3) elif window=='Welch': # Welch window = 1.0 - ((np.arange(window_width) - 0.5*(window_width-1))/(0.5*(window_width-1)))**2 elif window=='Hamming': # Hamming alpha = 0.54 beta = 1.-alpha window = alpha - beta*np.cos(2 * np.pi * np.arange(window_width) / (window_width - 1)) elif window=='Blackman': # Blackman alpha = 0.16 a0 = 0.5*(1-alpha) a1 = 0.5 a2 = 0.5*alpha window = a0 - a1*np.cos(2 * np.pi * np.arange(window_width) / (window_width - 1)) + a2*np.cos(4 * np.pi * np.arange(window_width) / (window_width - 1)) elif window=='BlackmanHarris': # Blackman-Harris a0 = 0.358375 a1 = 0.48829 a2 = 0.14128 a3 = 0.01168 window = a0 - a1*np.cos(2 * np.pi * np.arange(window_width) / (window_width - 1)) + a2*np.cos(4 * np.pi * np.arange(window_width) / (window_width - 1)) - a3*np.cos(6 * np.pi * np.arange(window_width) / (window_width - 1)) elif window=='Ones': window = np.ones(window_width) else: print("Unknown window, using Hann") window = 0.5 * (1 - np.cos(2 * np.pi * np.arange(window_width) / (window_width - 1))) if equal_loudness: self.sg = self.equalLoudness(self.sg) if mean_normalise: self.sg -= self.sg.mean() starts = range(0, len(self.sg) - window_width, incr) if multitaper: [tapers, eigen] = dpss(window_width, 2.5, 4) counter = 0 out = np.zeros((len(starts),window_width // 2)) for start in starts: Sk, weights, eigen = pmtm(self.sg[start:start + window_width], v=tapers, e=eigen, show=False) Sk = abs(Sk)**2 Sk = np.mean(Sk.T * weights, axis=1) out[counter:counter + 1,:] = Sk[window_width // 2:].T counter += 1 self.sg = np.fliplr(out) else: if need_even: starts = np.hstack((starts, np.zeros((window_width - len(self.sg) % window_width),dtype=int))) # this mode is optimized for speed, but reportedly sometimes # results in crashes when lots of large files are batch processed. # The FFTs here could be causing this, but I'm not sure. # hi_mem = False should switch FFTs to go over smaller vectors # and possibly use less caching, at the cost of 1.5x longer CPU time. hi_mem = True if hi_mem: ft = np.zeros((len(starts), window_width)) for i in starts: ft[i // incr, :] = self.sg[i:i + window_width] ft = np.multiply(window, ft) if onesided: self.sg = np.absolute(fft.fft(ft)[:, :window_width //2]) else: self.sg = np.absolute(fft.fft(ft)) else: if onesided: ft = np.zeros((len(starts), window_width//2)) for i in starts: winddata = window * self.sg[i:i + window_width] ft[i // incr, :] = fft.fft(winddata)[:window_width//2] else: ft = np.zeros((len(starts), window_width)) for i in starts: winddata = window * self.sg[i:i + window_width] ft[i // incr, :] = fft.fft(winddata) self.sg = np.absolute(ft) del ft gc.collect() #sg = (ft*np.conj(ft))[:,window_width // 2:].T return self.sg def bandpassFilter(self,data=None,sampleRate=None,start=0,end=None): """ FIR bandpass filter 128 taps, Hamming window, very basic. """ if data is None: data = self.data if sampleRate is None: sampleRate = self.sampleRate if end is None: end = sampleRate/2 start = max(start,0) end = min(end,sampleRate/2) if start == 0 and end == sampleRate/2: print("No filter needed!") return data nyquist = sampleRate/2 ntaps = 129 if start == 0: # Low pass taps = signal.firwin(ntaps, cutoff=[end / nyquist], window=('hamming'), pass_zero=True) elif end == sampleRate/2: # High pass taps = signal.firwin(ntaps, cutoff=[start / nyquist], window=('hamming'), pass_zero=False) else: # Bandpass taps = signal.firwin(ntaps, cutoff=[start / nyquist, end / nyquist], window=('hamming'), pass_zero=False) #ntaps, beta = signal.kaiserord(ripple_db, width) #taps = signal.firwin(ntaps,cutoff = [500/nyquist,8000/nyquist], window=('kaiser', beta),pass_zero=False) return signal.lfilter(taps, 1.0, data) def ButterworthBandpass(self,data,sampleRate,low=0,high=None,band=0.005): """ Basic IIR bandpass filter. Identifies order of filter, max 10. If single-stage polynomial is unstable, switches to order 30, second-order filter. Args: 1-2. data and sample rate. 3-4. Low and high pass frequencies in Hz 5. difference between stopband and passband, in fraction of Nyquist. Filter will lose no more than 3 dB in freqs [low,high], and attenuate at least 40 dB outside [low-band*Fn, high+band*Fn]. Does double-pass filtering - slower, but keeps original phase. """ if data is None: data = self.data if sampleRate is None: sampleRate = self.sampleRate nyquist = sampleRate/2 if high is None: high = nyquist low = max(low,0) high = min(high,nyquist) # convert freqs to fractions of Nyquist: lowPass = low/nyquist highPass = high/nyquist lowStop = lowPass-band highStop = highPass+band # safety checks for values near edges if lowStop<0: lowStop = lowPass/2 if highStop>1: highStop = (1+highPass)/2 if lowPass == 0 and highPass == 1: print("No filter needed!") return data elif lowPass == 0: # Low pass # calculate the best order order,wN = signal.buttord(highPass, highStop, 3, 40) if order>10: order=10 b, a = signal.butter(order,wN, btype='lowpass') elif highPass == 1: # High pass # calculate the best order order,wN = signal.buttord(lowPass, lowStop, 3, 40) if order>10: order=10 b, a = signal.butter(order,wN, btype='highpass') else: # Band pass # calculate the best order order,wN = signal.buttord([lowPass, highPass], [lowStop, highStop], 3, 40) if order>10: order=10 b, a = signal.butter(order,wN, btype='bandpass') # check if filter is stable filterUnstable = np.any(np.abs(np.roots(a))>1) if filterUnstable: # redesign to SOS and filter. # uses order=30 because why not print("single-stage filter unstable, switching to SOS filtering") if lowPass == 0: sos = signal.butter(30, wN, btype='lowpass', output='sos') elif highPass == 1: sos = signal.butter(30, wN, btype='highpass', output='sos') else: sos = signal.butter(30, wN, btype='bandpass', output='sos') # do the actual filtering data = signal.sosfiltfilt(sos, data) else: # do the actual filtering data = signal.filtfilt(b, a, data) return data def FastButterworthBandpass(self,data,low=0,high=None): """ Basic IIR bandpass filter. Streamlined to be fast - for use in antialiasing etc. Tries to construct a filter of order 7, with critical bands at +-0.002 Fn. This corresponds to +- 16 Hz or so. If single-stage polynomial is unstable, switches to order 30, second-order filter. Args: 1-2. data and sample rate. 3-4. Low and high pass frequencies in fraction of Nyquist Does single-pass filtering, so does not retain phase. """ if data is None: data = self.data # convert freqs to fractions of Nyquist: lowPass = max(low-0.002, 0) highPass = min(high+0.002, 1) if lowPass == 0 and highPass == 1: print("No filter needed!") return data elif lowPass == 0: # Low pass b, a = signal.butter(7, highPass, btype='lowpass') elif highPass == 1: # High pass b, a = signal.butter(7, lowPass, btype='highpass') else: # Band pass b, a = signal.butter(7, [lowPass, highPass], btype='bandpass') # check if filter is stable filterUnstable = np.any(np.abs(np.roots(a))>1) if filterUnstable: # redesign to SOS and filter. # uses order=30 because why not print("single-stage filter unstable, switching to SOS filtering") if lowPass == 0: sos = signal.butter(30, highPass, btype='lowpass', output='sos') elif highPass == 1: sos = signal.butter(30, lowPass, btype='highpass', output='sos') else: sos = signal.butter(30, [lowPass, highPass], btype='bandpass', output='sos') # do the actual filtering data = signal.sosfilt(sos, data) else: data = signal.lfilter(b, a, data) return data # The next functions perform spectrogram inversion def invertSpectrogram(self,sg,window_width=256,incr=64,nits=10): # Assumes that this is the plain (not power) spectrogram # Make the spectrogram two-sided and make the values small sg = np.concatenate([sg, sg[:, ::-1]], axis=1) sg_best = copy.deepcopy(sg) for i in range(nits): invertedSgram = self.inversion_iteration(sg_best, incr, calculate_offset=True,set_zero_phase=(i==0)) self.setData(invertedSgram) est = self.spectrogram(window_width, incr, onesided=False,need_even=True) phase = est / np.maximum(np.max(sg)/1E8, np.abs(est)) sg_best = sg * phase[:len(sg)] invertedSgram = self.inversion_iteration(sg_best, incr, calculate_offset=True,set_zero_phase=False) return np.real(invertedSgram) def inversion_iteration(self,sg, incr, calculate_offset=True, set_zero_phase=True, window='Hann'): """ Under MSR-LA License Based on MATLAB implementation from Spectrogram Inversion Toolbox References ---------- D. Griffin and J. Lim. Signal estimation from modified short-time Fourier transform. IEEE Trans. Acoust. Speech Signal Process., 32(2):236-243, 1984. Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory Model Inversion for Sound Separation. Proc. IEEE-ICASSP, Adelaide, 1994, II.77-80. Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal Estimation from Modified Short-Time Fourier Transform Magnitude Spectra. IEEE Transactions on Audio Speech and Language Processing, 08/2007. """ size = int(np.shape(sg)[1] // 2) wave = np.zeros((np.shape(sg)[0] * incr + size),dtype='float64') # Getting overflow warnings with 32 bit... #wave = wave.astype('float64') total_windowing_sum = np.zeros((np.shape(sg)[0] * incr + size)) #Virginia: adding different windows if window == 'Hann': # Hann window window = 0.5 * (1 - np.cos(2 * np.pi * np.arange(size) / (size - 1))) elif window == 'Blackman': # Blackman alpha = 0.16 a0 = 0.5 * (1 - alpha) a1 = 0.5 a2 = 0.5 * alpha window = a0 - a1 * np.cos(2 * np.pi * np.arange(size) / (size - 1)) + a2 * np.cos(4 * np.pi * np.arange(size) / (size - 1)) est_start = int(size // 2) - 1 est_end = est_start + size for i in range(sg.shape[0]): wave_start = int(incr * i) wave_end = wave_start + size if set_zero_phase: spectral_slice = sg[i].real + 0j else: # already complex spectral_slice = sg[i] wave_est = np.real(fft.ifft(spectral_slice))[::-1] if calculate_offset and i > 0: offset_size = size - incr if offset_size <= 0: print("WARNING: Large step size >50\% detected! " "This code works best with high overlap - try " "with 75% or greater") offset_size = incr offset = self.xcorr_offset(wave[wave_start:wave_start + offset_size], wave_est[est_start:est_start + offset_size]) else: offset = 0 wave[wave_start:wave_end] += window * wave_est[est_start - offset:est_end - offset] total_windowing_sum[wave_start:wave_end] += window**2 #Virginia: needed square wave = np.real(wave) / (total_windowing_sum + 1E-6) return wave def xcorr_offset(self,x1, x2): x1 = x1 - x1.mean() x2 = x2 - x2.mean() frame_size = len(x2) half = frame_size // 2 corrs = np.convolve(x1.astype('float32'), x2[::-1].astype('float32')) corrs[:half] = -1E30 corrs[-half:] = -1E30 return corrs.argmax() - len(x1) def medianFilter(self,data=None,width=11): # Median Filtering # Uses smaller width windows at edges to remove edge effects # TODO: Use abs rather than pure median? if data is None: data = self.data mData = np.zeros(len(data)) for i in range(width,len(data)-width): mData[i] = np.median(data[i-width:i+width]) for i in range(len(data)): wid = min(i,len(data)-i,width) mData[i] = np.median(data[i - wid:i + wid]) return mData # Could be either features of signal processing things. Anyway, they are here -- spectral derivatives and extensions def wiener_entropy(self,sg): return np.sum(np.log(sg),1)/np.shape(sg)[1] - np.log(np.sum(sg,1)/np.shape(sg)[1]) def mean_frequency(self,sampleRate,timederiv,freqderiv): freqs = sampleRate//2 / np.shape(timederiv)[1] * (np.arange(np.shape(timederiv)[1])+1) mfd = np.sum(timederiv**2 + freqderiv**2,axis=1) mfd = np.where(mfd==0,1,mfd) mf = np.sum(freqs * (timederiv**2 + freqderiv**2),axis=1)/mfd return freqs,mf def goodness_of_pitch(self,spectral_deriv,sg): return np.max(np.abs(fft.fft(spectral_deriv/sg, axis=0)),axis=0) def spectral_derivative(self, window_width, incr, K=2, threshold=0.5, returnAll=False): """ Compute the spectral derivative """ if self.data is None or len(self.data)==0: print("ERROR: attempted to calculate spectrogram without audiodata") return # Compute the set of multi-tapered spectrograms starts = range(0, len(self.data) - window_width, incr) [tapers, eigen] = dpss(window_width, 2.5, K) sg = np.zeros((len(starts), window_width, K), dtype=complex) for k in range(K): for i in starts: sg[i // incr, :, k] = tapers[:, k] * self.data[i:i + window_width] sg[:, :, k] = fft.fft(sg[:, :, k]) sg = sg[:, window_width//2:, :] # Spectral derivative is the real part of exp(i \phi) \sum_ k s_k conj(s_{k+1}) where s_k is the k-th tapered spectrogram # and \phi is the direction of maximum change (tan inverse of the ratio of pure time and pure frequency components) S = np.sum(sg[:, :, :-1]*np.conj(sg[:, :, 1:]), axis=2) timederiv = np.real(S) freqderiv = np.imag(S) # Frequency modulation is the angle $\pi/2 - direction of max change$ mfd = np.max(freqderiv**2, axis=0) mfd = np.where(mfd==0,1,mfd) fm = np.arctan(np.max(timederiv**2, axis=0) / mfd) spectral_deriv = -timederiv*np.sin(fm) + freqderiv*np.cos(fm) sg = np.sum(np.real(sg*np.conj(sg)), axis=2) sg /= np.max(sg) # Suppress the noise (spectral continuity) # Compute the zero crossings of the spectral derivative in all directions # Pixel is a contour pixel if it is at a zero crossing and both neighbouring pixels in that direction are > threshold sdt = spectral_deriv * np.roll(spectral_deriv, 1, 0) sdf = spectral_deriv * np.roll(spectral_deriv, 1, 1) sdtf = spectral_deriv * np.roll(spectral_deriv, 1, (0, 1)) sdft = spectral_deriv * np.roll(spectral_deriv, (1, -1), (0, 1)) indt, indf = np.where(((sdt < 0) | (sdf < 0) | (sdtf < 0) | (sdft < 0)) & (spectral_deriv < 0)) # Noise reduction using a threshold we = np.abs(self.wiener_entropy(sg)) freqs, mf = self.mean_frequency(self.sampleRate, timederiv, freqderiv) # Given a time and frequency bin contours = np.zeros(np.shape(spectral_deriv)) for i in range(len(indf)): f = indf[i] t = indt[i] if (t > 0) & (t < (np.shape(sg)[0]-1)) & (f > 0) & (f < (np.shape(sg)[1]-1)): thr = threshold*we[t]/np.abs(freqs[f] - mf[t]) if (sdt[t, f] < 0) & (sg[t-1, f] > thr) & (sg[t+1, f] > thr): contours[t, f] = 1 if (sdf[t, f] < 0) & (sg[t, f-1] > thr) & (sg[t, f+1] > thr): contours[t, f] = 1 if (sdtf[t, f] < 0) & (sg[t-1, f-1] > thr) & (sg[t+1, f+1] > thr): contours[t, f] = 1 if (sdft[t, f] < 0) & (sg[t-1, f+1] > thr) & (sg[t-1, f+1] > thr): contours[t, f] = 1 if returnAll: return spectral_deriv, sg, fm, we, mf, np.fliplr(contours) else: return np.fliplr(contours) def drawSpectralDeriv(self): # helper function to parse output for plotting spectral derivs. sd = self.spectral_derivative(self.window_width, self.incr, 2, 5.0) x, y = np.where(sd > 0) # remove points beyond frq range to show y1 = [i * self.sampleRate//2/np.shape(self.sg)[1] for i in y] y1 = np.asarray(y1) valminfrq = self.minFreqShow/(self.sampleRate//2/np.shape(self.sg)[1]) inds = np.where((y1 >= self.minFreqShow) & (y1 <= self.maxFreqShow)) x = x[inds] y = y[inds] y = [i - valminfrq for i in y] return x, y def drawFundFreq(self, seg): # produces marks of fundamental freq to be drawn on the spectrogram. pitch, starts, _, W = seg.yin() # find out which marks should be visible ind = np.logical_and(pitch > self.minFreqShow+50, pitch < self.maxFreqShow) if not np.any(ind): print("Warning: no fund. freq. identified in this page") return # ffreq is calculated over windows of size W # first, identify segments using that original scale: segs = seg.convert01(ind) segs = seg.deleteShort(segs, 2) segs = seg.joinGaps(segs, 2) # extra round to delete those which didn't merge with any longer segments segs = seg.deleteShort(segs, 4) yadjfact = 2/self.sampleRate*np.shape(self.sg)[1] # then map starts from samples to spec windows starts = starts / self.incr # then convert segments back to positions in each array: out = [] for s in segs: # convert [s, e] to [s s+1 ... e-1 e] i = np.arange(s[0], s[1]) # retrieve all pitch and start positions corresponding to this segment pitchSeg = pitch[i] # Adjust pitch marks to the visible freq range on the spec y = ((pitchSeg-self.minFreqShow)*yadjfact).astype('int') # smooth the pitch lines medfiltsize = min((len(y)-1)//2*2+1, 15) y = medfilt(y, medfiltsize) # joinGaps can introduce no-pitch pixels, which cause # smoothed segments to have 0 ends. Trim those: trimst = 0 while y[trimst]==0 and trimst<medfiltsize//2: trimst += 1 trime = len(y)-1 while y[trime]==0 and trime>len(y)-medfiltsize//2: trime -= 1 y = y[trimst:trime] i = i[trimst:trime] out.append((starts[i], y)) return out def max_energy(self, sg,thr=1.2): # Remember that spectrogram is actually rotated! colmaxinds = np.argmax(sg,axis=1) points = np.zeros(np.shape(sg)) # If one wants to show only some colmaxs: # sg = sg/np.max(sg) # colmedians = np.median(sg, axis=1) # colmax = np.max(sg,axis=1) # inds = np.where(colmax>thr*colmedians) # print(len(inds)) # points[inds, colmaxinds[inds]] = 1 # just mark the argmax position in each column points[range(points.shape[0]), colmaxinds] = 1 x, y = np.where(points > 0) # convert points y coord from spec units to Hz yfr = [i * self.sampleRate//2/np.shape(self.sg)[1] for i in y] yfr = np.asarray(yfr) # remove points beyond frq range to show inds = np.where((yfr >= self.minFreqShow) & (yfr <= self.maxFreqShow)) x = x[inds] y = y[inds] # adjust y pos for when spec doesn't start at 0 specstarty = self.minFreqShow / (self.sampleRate // 2 / np.shape(self.sg)[1]) y = [i - specstarty for i in y] return x, y def denoiseImage(self,sg,thr=1.2): from skimage.restoration import (denoise_tv_chambolle, denoise_bilateral, denoise_wavelet, estimate_sigma) sigma_est = estimate_sigma(sg, multichannel=False, average_sigmas=True) sgnew = denoise_tv_chambolle(sg, weight=0.2, multichannel=False) #sgnew = denoise_bilateral(sg, sigma_color=0.05, sigma_spatial=15, multichannel=False) #sgnew = denoise_wavelet(sg, multichannel=False) return sgnew def denoiseImage2(self,sg,filterSize=5): # Filter size is odd [x,y] = np.shape(sg) width = filterSize//2 sgnew = np.zeros(np.shape(sg)) sgnew[0:width+1,:] = sg[0:width+1,:] sgnew[-width:,:] = sg[-width:,:] sgnew[:,0:width+1] = sg[:,0:width+1] sgnew[:,-width:] = sg[:,-width:] for i in range(width,x-width): for j in range(width,y-width): sgnew[i,j] = np.median(sg[i-width:i+width+1,j-width:j+width+1]) print(sgnew) return sgnew def mark_rain(self, sg, thr=0.9): row, col = np.shape(sg.T) print(row, col) inds = np.where(sg > thr * np.max(sg)) longest = np.zeros(col) start = np.zeros(col) for c in range(col): r = 0 l = 0 s = 0 j = 0 while inds[0][r] == c: if inds[1][r + 1] == inds[1][r] + 1: l += 1 else: if l > longest[c]: longest[c] = l start[c] = s l = 0 s = j + 1 r += 1 newsg = np.zeros(np.shape(sg)) newsg = newsg.T for c in range(col): if longest[c] > 10: newsg[c, start[c]:start[c] + longest[c]] = 1 print(longest) return newsg.T def denoise(self, alg, start=None, end=None, width=None): """ alg - string, algorithm type from the Denoise dialog start, end - filtering limits, from Denoise dialog width - median parameter, from Denoise dialog """ if str(alg) == "Wavelets": print("Don't use this interface for wavelets") return elif str(alg) == "Bandpass": self.data = self.bandpassFilter(self.data,self.sampleRate, start=start, end=end) elif str(alg) == "Butterworth Bandpass": self.data = self.ButterworthBandpass(self.data, self.sampleRate, low=start, high=end) else: # Median Filter self.data = self.medianFilter(self.data,int(str(width))) def impMask(self, engp=90, fp=0.75): """ Impulse mask :param engp: energy percentile (for rows of the spectrogram) :param fp: frequency proportion to consider it as an impulse (cols of the spectrogram) :return: audiodata """ print('Impulse masking...') imps = self.impulse_cal(fs=self.sampleRate, engp=engp, fp=fp) print('Samples to mask: ', len(self.data) - np.sum(imps)) # Mask only the affected samples return np.multiply(self.data, imps) def impulse_cal(self, fs, engp=90, fp=0.75, blocksize=10): """ Find sections where impulse sounds occur e.g. clicks window - window length (no overlap) engp - energy percentile (thr), the percentile of energy to inform that a section got high energy across frequency bands fp - frequency percentage (thr), the percentage of frequency bands to have high energy to mark a section as having impulse noise blocksize - max number of consecutive blocks, 10 consecutive blocks (~1/25 sec) is a good value, to not to mask very close-range calls :return: a binary list of length len(data) indicating presence of impulsive noise (0) otherwise (1) """ # Calculate window length w1 = np.floor(fs/250) # Window length of 1/250 sec selected experimentally arr = [2 ** i for i in range(5, 11)] pos = np.abs(arr - w1).argmin() window = arr[pos] sp = SignalProc(window, window) # No overlap sp.data = self.data sp.sampleRate = self.sampleRate sg = sp.spectrogram(multitaper=False) # For each frq band get sections where energy exceeds some (90%) percentile, engp # and generate a binary spectrogram sgb = np.zeros((np.shape(sg))) ep = np.percentile(sg, engp, axis=0) # note thr - 90% for energy percentile for y in range(np.shape(sg)[1]): ey = sg[:, y] sgb[np.where(ey > ep[y]), y] = 1 # If lots of frq bands got 1 then predict a click # 1 - presence of impulse noise, 0 - otherwise here impulse = np.where(np.count_nonzero(sgb, axis=1) > np.shape(sgb)[1] * fp, 1, 0) # Note thr fp # When an impulsive noise detected, it's better to check neighbours to make sure its not a bird call # very close to the microphone. imp_inds = np.where(impulse > 0)[0].tolist() imp = self.countConsecutive(imp_inds, len(impulse)) impulse = [] for item in imp: if item > blocksize or item == 0: # Note threshold - blocksize, 10 consecutive blocks ~1/25 sec impulse.append(1) else: impulse.append(0) impulse = list(chain.from_iterable(repeat(e, window) for e in impulse)) # Make it same length as self.audioData if len(impulse) > len(self.data): # Sanity check impulse = impulse[0:len(self.data)] elif len(impulse) < len(self.data): gap = len(self.data) - len(impulse) impulse = np.pad(impulse, (0, gap), 'constant') return impulse def countConsecutive(self, nums, length): gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s + 1 < e] edges = iter(nums[:1] + sum(gaps, []) + nums[-1:]) edges = list(zip(edges, edges)) edges_reps = [item[1] - item[0] + 1 for item in edges] res = np.zeros((length)).tolist() t = 0 for item in edges: for i in range(item[0], item[1]+1): res[i] = edges_reps[t] t += 1 return res