def signal_energy(signal, fs, fl=0.05, fh=0.02): X = stft(signal, fs, fl, fh) #pylab.figure(); #pylab.imshow(scipy.absolute(X.T), origin='lower', aspect='auto', # interpolation='nearest') #pylab.specgram(signal, NFFT=44100*fs, noverlap=44100*fh); #pylab.show(); mag = scipy.absolute(X) energy = scipy.sum(mag, 1) return energy
def signal_energy(signal, fs, fl = 0.05, fh=0.02): X = stft(signal, fs, fl,fh) #pylab.figure(); #pylab.imshow(scipy.absolute(X.T), origin='lower', aspect='auto', # interpolation='nearest') #pylab.specgram(signal, NFFT=44100*fs, noverlap=44100*fh); #pylab.show(); mag = scipy.absolute(X); energy = scipy.sum(mag, 1) return energy
def computeSpectrogram(self, timeseries, nFFT, nHop, dt, windowFn, fRange): ''' Compute STFT spectrogram of data in timeseries. Expected numpy.array shape (nSamples,nSignals) Returns: spectra, (nTimes,nFreq,nSignals) ''' data = timeseries.getData() fVec = self.getFreqVec() if fRange is not None: mask = np.logical_and(fVec >= fRange[0], fVec <= fRange[1]) else: # Create an all true array mask = np.ones((len(fVec), 1)) == 1 mask = np.nonzero(mask) mask = mask[0] # Loop over signals (2nd dimension of ) Nsignals = timeseries.numSignals() for i in range(Nsignals): x = data[:, i, np.newaxis] X = spectrogram.stft(x, nFFT, nHop, transform=np.fft.fft, win=windowFn, zp_back=0, zp_front=0) if i == 0: spectra = np.zeros((X.shape[0], len(mask), Nsignals), dtype=complex) spectra[:, :, i] = X[:, mask] return spectra
def transform(audio_data, save_image_path, nFFT=256, overlap=0.75): '''audio_data: signals to convert save_image_path: path to store the image file''' # spectrogram freq_data = stft(audio_data, nFFT, overlap) freq_data = np.maximum(np.abs(freq_data), np.max(np.abs(freq_data)) / 10000) log_freq_data = 20. * np.log10(freq_data / 1e-4) N_samples = log_freq_data.shape[0] # log_freq_data = np.maximum(log_freq_data, max_m - 70) # print(np.max(np.max(log_freq_data))) # print(np.min(np.min(log_freq_data))) log_freq_data = np.round(log_freq_data) log_freq_data = np.transpose(log_freq_data) # ipdb.set_trace() assert np.max(np.max(log_freq_data)) < 256, 'spectrogram value too large' # save the image spec_imag = Image.fromarray(log_freq_data) spec_imag = spec_imag.convert('RGB') spec_imag.save(save_image_path) return N_samples
def find_words(data, fs, num, **args): min_silence_length = args["min_silence_length"] if "min_silence_length" in args else 5 _plot = args["plot"] if "plot" in args else False _debug = False frml = 0.03 frmh = 0.02 X = stft(data, fs, frml, frmh) magnitude = np.absolute(X) signal_energy = np.sum(magnitude, 1) if "thresh" in args.keys(): thresh = args["thresh"] else: thresh = np.mean(signal_energy) - np.min(signal_energy) en_th = (signal_energy > thresh).astype(np.int8) rising_edge = np.diff( np.hstack(([0], en_th)) ) falling_edge = np.diff( np.hstack((en_th, [0])) ) (stp,) = np.nonzero(rising_edge > 0) (edp,) = np.nonzero(falling_edge < 0) if len(stp) != len(edp): raise ValueError("Wrong threshold value") """Remove too short silence periods""" silence = stp[1:] - edp[:-1] (too_short,) = np.nonzero(silence <= min_silence_length) stp = np.delete(stp, too_short + 1) edp = np.delete(edp, too_short) nonsilence = edp - stp shortest = nonsilence.argsort() if len(nonsilence) < num: raise ValueError("You say less words than you ask me to find or you speak too fast") """ Remove too short nonsilence """ stp = np.delete(stp, shortest[:-num]) edp = np.delete(edp, shortest[:-num]) pos = lambda a: fs*frmh*a if _plot: pylab.subplot(2,1,1) etime = np.array(range(len(signal_energy)), dtype=float)*frmh pylab.plot(etime,signal_energy/1e6) pylab.axhline(thresh/1e6) [ pylab.axvline(x*frmh, color="red") for x in stp ] [ pylab.axvline(x*frmh, color="green") for x in edp ] #pylab.xlabel("Czas [s]") pylab.title("Energia wypowiedzi w czasie") time = np.array(range(len(data)), dtype=float)/fs sc_data = data - np.mean(data) sc_data = sc_data/np.max(sc_data) pylab.subplot(2,1,2) pylab.plot(time, sc_data) [ pylab.axvline(pos(x)/fs, color="red") for x in stp ] [ pylab.axvline(pos(x)/fs, color="green") for x in edp ] pylab.xlabel("Czas [s]") pylab.title(u"Zapis sygnału") pylab.show() return [ data[pos(a):pos(b)] for a,b in zip(stp,edp) ]
def find_words(data, fs, num, **args): min_silence_length = args["min_silence_length"] if "min_silence_length" in args else 5 _plot = args["plot"] if "plot" in args else False _debug = False frml = 0.03 frmh = 0.02 X = stft(data, fs, frml, frmh) magnitude = np.absolute(X) signal_energy = np.sum(magnitude, 1) if "thresh" in args.keys(): thresh = args["thresh"] else: thresh = np.mean(signal_energy) - np.min(signal_energy) en_th = (signal_energy > thresh).astype(np.int8) rising_edge = np.diff(np.hstack(([0], en_th))) falling_edge = np.diff(np.hstack((en_th, [0]))) (stp,) = np.nonzero(rising_edge > 0) (edp,) = np.nonzero(falling_edge < 0) if len(stp) != len(edp): raise ValueError("Wrong threshold value") """Remove too short silence periods""" silence = stp[1:] - edp[:-1] (too_short,) = np.nonzero(silence <= min_silence_length) stp = np.delete(stp, too_short + 1) edp = np.delete(edp, too_short) nonsilence = edp - stp shortest = nonsilence.argsort() if len(nonsilence) < num: raise ValueError("You say less words than you ask me to find or you speak too fast") """ Remove too short nonsilence """ stp = np.delete(stp, shortest[:-num]) edp = np.delete(edp, shortest[:-num]) pos = lambda a: fs * frmh * a if _plot: pylab.subplot(2, 1, 1) etime = np.array(range(len(signal_energy)), dtype=float) * frmh pylab.plot(etime, signal_energy / 1e6) pylab.axhline(thresh / 1e6) [pylab.axvline(x * frmh, color="red") for x in stp] [pylab.axvline(x * frmh, color="green") for x in edp] # pylab.xlabel("Czas [s]") pylab.title("Energia wypowiedzi w czasie") time = np.array(range(len(data)), dtype=float) / fs sc_data = data - np.mean(data) sc_data = sc_data / np.max(sc_data) pylab.subplot(2, 1, 2) pylab.plot(time, sc_data) [pylab.axvline(pos(x) / fs, color="red") for x in stp] [pylab.axvline(pos(x) / fs, color="green") for x in edp] pylab.xlabel("Czas [s]") pylab.title(u"Zapis sygnału") pylab.show() return [data[pos(a) : pos(b)] for a, b in zip(stp, edp)]