def frame_wceps(self, frame_in): """Returns warped MFCCs.""" # Calculate blackman-windowed cepstrum if self.window is None: self.window = scipy.blackman(frame_in.shape[0] + 1)[:-1] self.window /= np.sum(self.window) windowed_frame = self.window * frame_in.flatten() windowed_frame -= np.mean(windowed_frame) power_spec = np.fft.fft(windowed_frame) power_spec = np.real(power_spec) * np.real(power_spec) + np.imag( power_spec) * np.imag(power_spec) cepstrum = np.fft.ifft(np.log(power_spec + 0.00001)) # Scale the first and the middle coefficients by 0.5 cepstrum = np.real(cepstrum.reshape(1, cepstrum.shape[0])) cepstrum[:, 0] = cepstrum[:, 0] * 0.5 cepstrum[:, len(self.window) // 2] = cepstrum[:, len(self.window) // 2] * 0.5 warped_cepstrum_fs = self.warper.warpSequence( [bk.FeatureSequence(cepstrum, True, True)], int(len(self.window) / 2), self.num_wceps, 0.42)[0] # TODO ??? power_spec = power_spec.reshape(1, power_spec.shape[0]) try: warped_cepstrum_fsOpt = self.warper.minimizeEstimationBias( warped_cepstrum_fs, bk.FeatureSequence(power_spec, True, True), 5, 0.42) except ValueError: warped_cepstrum_fsOpt = warped_cepstrum_fs return (warped_cepstrum_fs.getMatrix().flatten())
def stft(x, fftsize, overlap, beta): if beta == 0: hop = fftsize / overlap w = sc.blackman( fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] X = np.array([ np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop) ]) y = np.linspace(0, bins[-1], np.shape(X)[1]) x = np.linspace(0, tid[-1], np.shape(X)[0]) return X, x, y if beta != 0: hop = fftsize / overlap w = sc.hanning( fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] X = np.array([ np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop) ]) y = np.linspace(0, bins[-1], np.shape(X)[1]) x = np.linspace(0, tid[-1], np.shape(X)[0]) return X, x, y
def windowWeight(sig, window_size): output = np.array([], dtype=np.float64) w = scipy.blackman(window_size) l = len(sig) - window_size for i in range(0, l, window_size): output = np.append(output, w*sig[i:i+window_size]) output = np.append(output, w*sig[len(sig)-window_size:len(sig)]) return output
def extract_feats(self, frame_in): """Returns warped MFCCs.""" # Calculate blackman-windowed cepstrum if self.window is None: self.window = scipy.blackman(frame_in.shape[0] + 1)[:-1] self.window /= np.sum(self.window) windowed_frame = self.window * frame_in.flatten() windowed_frame -= np.mean(windowed_frame) result_frames = np.array([ np.sum([x**2 for x in windowed_frame], axis=0) / float(windowed_frame.shape[0]) ]) return result_frames.reshape(-1, 1)
def plot_spectrogram(waveform, sampling_rate, window_name, filename): """ スペクトログラムを表示 """ window_duration = 40.0 * 1.0e-3 # 窓関数の長さ、単位は秒 window_shift = 5.0 * 1.0e-3 # 窓関数をスライドさせる長さ、単位は秒 window_size = int(window_duration * sampling_rate) # 窓関数のサンプル数 window_overlap = int((window_duration - window_shift) * sampling_rate) # 隣接する窓関数の重なり # 窓関数本体 if window_name == "hanning": window = scipy.hanning(window_size) # ハニング窓 elif window_name == "hamming": window = scipy.hamming(window_size) # ハミング窓 elif window_name == "gaussian": window = scipy.gaussian(window_size) # ガウス窓?? elif window_name == "blackman": window = scipy.blackman(window_size) # ブラックマン窓 elif window_name == "trianglar": window = scipy.triang(window_size) # 三角窓?? elif window_name == "rectanglar": window = scipy.rectang(window_size) # 矩形窓?? else: print "The window function name is wrong." exit() sp, freqs, times, ax = plt.specgram( waveform, NFFT=window_size, Fs=sampling_rate, window=window, noverlap=window_overlap ) plt.title("Spectrogram [" + window_name + "] (" + filename + ")") plt.xlabel("Time[sec]") plt.ylabel("Frequency[Hz]") plt.xlim([0, times[-1]]) plt.ylim([0, 5000]) plt.savefig("graph/spectrogram/" + filename.split("/") [1].split(".")[0] + "_" + window_name + ".png")
def plot_spectrogram(waveform, sampling_rate, window_name, filename): """ スペクトログラムを表示 """ window_duration = 40.0 * 1.0e-3 # 窓関数の長さ、単位は秒 window_shift = 5.0 * 1.0e-3 # 窓関数をスライドさせる長さ、単位は秒 window_size = int(window_duration * sampling_rate) # 窓関数のサンプル数 window_overlap = int( (window_duration - window_shift) * sampling_rate) # 隣接する窓関数の重なり # 窓関数本体 if window_name == "hanning": window = scipy.hanning(window_size) # ハニング窓 elif window_name == "hamming": window = scipy.hamming(window_size) # ハミング窓 elif window_name == "gaussian": window = scipy.gaussian(window_size) # ガウス窓?? elif window_name == "blackman": window = scipy.blackman(window_size) # ブラックマン窓 elif window_name == "trianglar": window = scipy.triang(window_size) # 三角窓?? elif window_name == "rectanglar": window = scipy.rectang(window_size) # 矩形窓?? else: print "The window function name is wrong." exit() sp, freqs, times, ax = plt.specgram(waveform, NFFT=window_size, Fs=sampling_rate, window=window, noverlap=window_overlap) plt.title("Spectrogram [" + window_name + "] (" + filename + ")") plt.xlabel("Time[sec]") plt.ylabel("Frequency[Hz]") plt.xlim([0, times[-1]]) plt.ylim([0, 5000]) plt.savefig("graph/spectrogram/" + filename.split("/")[1].split(".")[0] + "_" + window_name + ".png")
def __init__(self, originalFrameSizeMs, frameShiftMs, sampleRate, melCoeffCount, numReconstructionIterations=5, extraContext=0, cutoff=7900, normFactor=1.0, useLogMels=True, name='GriffinLim'): """Initializes three ring buffers, one for spectra and two for audio output""" super(GriffinLimSynthesis, self).__init__(name=name) self.useLogMels = useLogMels # Make sure no integer accidents happen frameSizeMs = float(originalFrameSizeMs) frameShiftMs = float(frameShiftMs) sampleRate = float(sampleRate) # Frame size and shift self.frameShiftMs = frameShiftMs self.sampleRate = sampleRate self.fftSize = int((frameSizeMs / 1000.0) * self.sampleRate) self.frameShift = int((frameShiftMs / 1000.0) * self.sampleRate) # Set block length accounting for overlap self.contextWidth = int(frameSizeMs / frameShiftMs) self.blockLen = self.contextWidth * 2 + 1 + extraContext # Length for ring buffers self.inputBufferLength = int(self.blockLen * 2.5) self.outputBufferLength = int(self.fftSize + self.frameShift * self.blockLen * 2.5) # Buffers and positions self.inputBuffer = [] self.outputBuffer = [] self.windowBuffer = [] self.inputBufferPos = 0 self.outputBufferPosMs = 0 self.framePos = 0 self.rfc = 0 self.startTime = time.time() # Processing parameters self.normFactor = normFactor self.fftWindow = scipy.blackman(self.fftSize) self.numReconstructionIterations = numReconstructionIterations filterOrd = int((sampleRate / 1000.0) * frameShiftMs / 32.0) self.filterNumerator, self.filterDenominator = signal.iirfilter( filterOrd, float(cutoff) / float((sampleRate / 2)), btype="lowpass") self.filterState = signal.lfiltic(self.filterNumerator, self.filterDenominator, np.array([])) specSize = int(int((frameSizeMs / 1000.0) * sampleRate) / 2 + 1) self.melFilter = mel.MelFilterBank(specSize, melCoeffCount, sampleRate)
def add_data(self, dataFrame, data_id=0): """Add a single frame of data, process it and potentially call callbacks. Buffer is allocated on first call.""" dataFrame = dataFrame.flatten() # Allocate buffers, if None if self.inputBuffer == []: self.inputBuffer = np.zeros( (self.inputBufferLength, dataFrame.shape[0])) self.outputBuffer = np.zeros(self.outputBufferLength) self.windowBuffer = np.zeros(self.outputBufferLength) # Add frame to input buffer and increase the framePos which is the write pointer of the ringbuffer self.inputBuffer[self.inputBufferPos] = dataFrame self.inputBufferPos = (self.inputBufferPos + 1) % self.inputBufferLength self.framePos += 1 # Calculate last index in the output buffer previousOutputBufferPos = int( (self.outputBufferPosMs / 1000.0) * self.sampleRate) # Compute new index in the output buffer self.outputBufferPosMs += self.frameShiftMs outputBufferPos = int( (self.outputBufferPosMs / 1000.0) * self.sampleRate) framesShifted = outputBufferPos - previousOutputBufferPos # Keep both indices in the range of the ring buffers previousOutputBufferPos = previousOutputBufferPos % self.outputBufferLength outputBufferPos = outputBufferPos % self.outputBufferLength # Force the index of the previous position to be smaller than the current index if previousOutputBufferPos >= outputBufferPos: previousOutputBufferPos = previousOutputBufferPos - self.outputBufferLength # Nothing more do until we have one complete block if self.framePos < self.blockLen - self.contextWidth: # TODO TEST ME return (np.array([])) # Indices for one input block bufferIndices = list( range(self.inputBufferPos - self.blockLen + self.contextWidth, self.inputBufferPos)) bufferIndices = [ x + self.inputBufferLength if x < 0 else x for x in bufferIndices ] # Not sure why indices must be positive # Process block reconstructedAudioFrames = self.reconstructWavFromSpectrogram( self. inputBuffer[bufferIndices], # indices of the frames in the block self.blockLen * self.frameShift # blockLen * frameshift in samples ) # Zero out values in the buffer at the current buffer range newIndices = list(range(previousOutputBufferPos, outputBufferPos)) newIndices = [ x + self.outputBufferLength if x < 0 else x for x in newIndices ] self.outputBuffer[newIndices] = np.zeros(len(newIndices)) self.windowBuffer[newIndices] = np.zeros(len(newIndices)) # Overlapp-add restored frame reconstructedIndices = list( range(outputBufferPos - reconstructedAudioFrames.shape[0], outputBufferPos)) reconstructedIndices = [ x + self.outputBufferLength if x < 0 else x for x in reconstructedIndices ] # Make indices positive. Not sure why overlapWindow = scipy.blackman(reconstructedAudioFrames.shape[0]) self.outputBuffer[ reconstructedIndices] += reconstructedAudioFrames # * overlapWindow self.windowBuffer[ reconstructedIndices] += overlapWindow # Why + and not * ??? # Build finalized frame returnIndices = list( range( outputBufferPos - reconstructedAudioFrames.shape[0], outputBufferPos - reconstructedAudioFrames.shape[0] + framesShifted)) returnIndices = [ x + self.outputBufferLength if x < 0 else x % self.outputBufferLength for x in returnIndices ] returnBuffer = self.outputBuffer[returnIndices] returnWindowBuffer = self.windowBuffer[returnIndices] for i in range(len(returnBuffer)): if returnWindowBuffer[i] != 0: returnBuffer[i] = returnBuffer[i] / returnWindowBuffer[i] # Band-pass returnBuffer, self.filterState = signal.lfilter(self.filterNumerator, self.filterDenominator, returnBuffer, zi=self.filterState) # Convert to int16 audio output and return self.rfc += len(returnBuffer) # print("RS RET FRAMES " + str(self.rfc) + " @ " + str(time.time())) # print(self.rfc / (time.time() - self.startTime)) self.output_data( np.int16( np.clip(returnBuffer / (self.normFactor * 1.01), -0.99, 0.99) * (2**15 - 1)))
# generate samples, note conversion to float32 array samples = (np.sin(2 * np.pi * np.arange(fs * duration) * f / fs)).astype( np.float32) samples1 = (np.sin(2 * np.pi * np.arange(fs * duration) * f1 / fs)).astype( np.float32) samples2 = (np.sin(2 * np.pi * np.arange(fs * duration) * f2 / fs)).astype( np.float32) # for paFloat32 sample values must be in range [-1.0, 1.0] data = volume * (samples + samples1 + samples2) fig, (time_audiodata_ax, freq_range_ax) = plt.subplots(2, 1) blk = scipy.blackman(fs) time_audiodata_ax.plot(data) freq_range_ax.set_title("freq") fft = np.abs(np.fft.fft(data * blk, norm='ortho').real**2) fft = fft[:len(fft) // 2] # fft = fft[:int(len(fft) / 2)] # keep only first half freq = np.fft.fftfreq(len(data), 1.0 / fs) freq = freq[:len(freq) // 2] # freq = freq[:int(len(freq) / 2)] # keep only first half # freqPeak = freq[np.where(fft == np.max(fft))[0][0]] + 1 freq_range_ax.plot(freq, fft) freqPeak = freq[np.where(fft == np.max(fft))[0][0]] + 1 print(freqPeak) plt.show()
def db(x): return 20 * np.log10(x) def pad(x, p): return np.pad(x, (0, p), 'constant', constant_values=0) def k(M, beta): return pad(sc.kaiser(M, beta), p) hamming = pad(sc.hamming(M), p) hann = pad(sc.hanning(M), p) bartlett = pad(sc.bartlett(M), p) blackman = pad(sc.blackman(M), p) kaiser = k(M, beta) rectangular = pad(rect(M), p) i = 0 while bins[i] < xaxis_max: inter = i i += 1 # dB plot of window as specified above plt.figure(1) plt.plot(bins[:inter], db(fft(k(M, beta))[:inter])) plt.xlabel('Frequency [rad/s]', fontsize=13) plt.ylabel('Amplitude [dB]', fontsize=13) plt.axis([0, bins[inter - 1], -100, 0])
import sounddevice as sd import numpy as np from numpy.fft import fft, fftfreq, fftshift import scipy import matplotlib.pyplot as plt import datetime from math import log2, pow # rate = 44100 rate = 44100 duration = 1200 plt.ion() fig, (time_audiodata_ax, freq_range_ax) = plt.subplots(2, 1) plt.show() blackman = scipy.blackman(rate) A4 = 440 C0 = A4 * pow(2, -4.75) name = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"] def pitch(freq): h = round(12 * log2(freq / C0)) octave = h // 12 n = h % 12 return name[n] + str(octave) with sd.Stream(channels=1, samplerate=rate) as s: playback_speed_multiplier = 1
if var == None: halfptr = length / 4. # solve exp(- (halfptr**2 / 2 * var)) = 0.5 for variance var = -halfptr**2 / (2 * np.log(0.5)) ptr = np.arange((1 - length) / 2., (length + 1) / 2.) Y = np.exp(-ptr**2 / (2 * var)) #Y = ((len+1)/2.) * (Y / sum(Y)); return Y if __name__ == '__main__': from pylab import * fftLen = 1024 std = fftLen / 2 Y1 = gaussian(fftLen) Y2 = hanning(fftLen) Y3 = hamming(fftLen) Y4 = blackman(fftLen) Y5 = bartlett(fftLen) # Y6 = kaiser(fftLen) plot(Y1, label="gaussian") plot(Y2, label="hanning") plot(Y3, label="hamming") plot(Y4, label="blackman") plot(Y5, label="bartlett") # plot(Y6, label="kaiser") ylim(0, 1) legend() show()
def play1(): global output1 global output0 sd.play(input1, FS) sd.wait() x0 = np.arange(0, LEN_0, 1) x1 = np.arange(0, LEN_1, 1) y0 = np.reshape(input0, LEN_0) y1 = np.reshape(input1, LEN_1) w = blackman(CHUNK) # Chunk numbers n0 = LEN_0 // CHUNK n1 = LEN_1 // CHUNK zf0 = np.zeros((n0, HALF_CHUNK)) zf1 = np.zeros((n1, HALF_CHUNK)) output0 = np.zeros((n0, HALF_CHUNK)) output1 = np.zeros((n1, HALF_CHUNK)) # Spectrum calculation for first record for i in range(0, n0): zf0[i] = 2.0 / CHUNK * abs( fft(y0[CHUNK * i:CHUNK * (i + 1)] * w)[0:HALF_CHUNK]) output0[i] = medfilt(zf0[i], MEDFILT_W) max0 = np.amax(output0[i]) for j in range(0, HALF_CHUNK): # Spectrum filter and mask if (output0[i][j] <= LOGIC_LEVEL * max0) or (np.abs(output0[i][j]) < NOISE_LEVEL): output0[i][j] = 0 else: output0[i][j] = 1 # Spectrum calculation for second record for i in range(0, n1): zf1[i] = 2.0 / CHUNK * abs( fft(y1[CHUNK * i:CHUNK * (i + 1)] * w)[0:HALF_CHUNK]) output1[i] = medfilt(zf1[i], MEDFILT_W) max1 = np.amax(output1[i]) for j in range(0, HALF_CHUNK): # Spectrum filter and mask if (output1[i][j] <= LOGIC_LEVEL * max1) or (np.abs(output1[i][j]) < NOISE_LEVEL): output1[i][j] = 0 else: output1[i][j] = 1 # Spectrum comparison n = n1 + 1 # Number of comparisons xd = np.arange(n) matches = np.zeros(n) recognition = np.zeros(n) output1 = np.concatenate([output1, np.zeros((n0, HALF_CHUNK))]) match_output1 = np.zeros((n0, HALF_CHUNK)) for i in range(0, n): window = output1[i:i + n0, :] for j in range(0, n0): for k in range(0, HALF_CHUNK): match_output1[j][k] = window[j][k] and output0[j][k] total_harm = np.count_nonzero(output0) match_harm = np.count_nonzero(match_output1) # Match function if total_harm > 0: matches[i] = 1 - (total_harm - match_harm) / total_harm else: matches[i] = 0 if i > 0: if ((matches[i] - matches[i - 1]) <= 0) and (matches[i - 1] > MATCH_LEVEL): recognition[i] = recognition[i - 1] = 1 if np.count_nonzero(matches) == 0: l3['text'] = "No matches found" else: l3['text'] = np.count_nonzero(matches) # Input/output plotting plt.figure() plt.title('Blue - first record, orange - second record, green - match') plt.plot(x0 / FS, y0, x1 / FS, y1) # , xd * n1 / ((n - 1) * N), matches plt.grid(True) plt.fill_between(xd * n1 / ((n - 1) * N), -1, 1, where=recognition > 0, color='green', alpha='0.75') plt.xlabel('Time, s') plt.show()