def goMorphing(): global soundMain, soundBase, RATE, N, nHop, fSmooth, fBalance, RECFILEPATH morphedPath4Web = "" #commUtill.logger.debug("now morphing") Sound = Sounds() # check sound file is set flg = checkSoundFileIsSet() if flg == False: return # on heroku if commUtill.ON_HEROKU: commUtill.logger.debug("get morphed file path ON HEROKU") FILE_NAME_HEROKU = 'finalized' EXTENSION = '.wav' path = outputPath + FILE_NAME_HEROKU + str( nSelectedSoundBase) + EXTENSION morphedPath4Web = path.replace(application_path + "/html", '', 1) # other normally else: # windowing win1 = hanning(N) win2 = hanning(N) # morphing Morph = Morphing() morphedPath = Morph.stftMorph(soundBase, soundMain, RATE, win1, N, win2, N, N // nHop, fSmooth, fBalance) # get array data soundMorph = Sound.getSound(morphedPath) doneSound = soundMorph # effect #soundEffected = reverb.mainProc(doneSound, RATE, 5, 0.05) #doneSound = soundEffected # Filter doneSound = IIRFilter.filterProc(doneSound, RATE, "lowpass", "butter", 3000.0) # write sound file in env #commUtill.logger.debug("write: " + morphedPath) #sf.write(morphedPath, doneSound, RATE) commUtill.writeSoundFile(morphedPath, doneSound, RATE) # retun path for web path = morphedPath morphedPath4Web = path.replace(application_path + "/html", '', 1) return morphedPath4Web
def stft(x, fftsize=16, overlap=2): retVal = [] if len(x) <= fftsize: w = scipy.hanning(len(x) + 1)[:-1] retVal = np.array([np.fft.rfft(w * x[0:len(x)])]) else: hop = round(fftsize / overlap) w = scipy.hanning(fftsize + 1)[:-1] retVal = np.array([ np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop) ]) return retVal
def test(filename=None): import random, os import matplotlib.pyplot as plt from sys import argv #signal, params = read_signal(sound,WINSIZE) scenario = None if filename != None: scene = os.path.basename(filename)[0] else: filename = random.choice([ x for x in os.listdir("tmp/") if os.path.splitext(x)[1] == ".flac" ]) scene = filename[0] filename = "tmp/" + filename print(filename) truths = vad.load_truths() signal, rate = speech.read_soundfile(filename) seconds = float(len(signal)) / rate winsize = librosa.time_to_samples(float(WINMS) / 1000, rate)[0] window = sp.hanning(winsize) ltsd = LTSD(winsize, window, 5) res, threshold, nstart, nend = ltsd.compute(signal) segments = ltsd.segments(res, threshold) #print(float(len(signal))/rate, librosa.core.frames_to_time(len(res), 8000, winsize/2)) segments = librosa.core.frames_to_time(segments, rate, winsize / 2) fig = plt.figure() ax = fig.add_subplot(111) #ax.plot((signal/np.max(signal))*np.mean(res)+np.mean(res)) ax.plot(np.linspace(0, seconds, len(res)), res) ax.plot([0, seconds], [threshold, threshold]) vad.plot_segments(truths[scene]['combined'], segments, ax) n1 = float(nstart) / rate n2 = float(nend) / rate ax.vlines([n1, n2], -20, 20) plt.show()
def smoothDownSampleFeature(dataframe, windowLength, downSampleFactor): ''' Temporal smoothing and downsampling of a feature sequence. Adapted from the smoothDownsampleFeature.m file of the Matlab Chroma Toolbox at http://resources.mpi-inf.mpg.de/MIR/chromatoolbox/ ''' def downsample_to_proportion(rows, proportion=1): return list(islice(rows, 0, len(rows), int(1 / proportion))) if windowLength == 1 and downSampleFactor == 1: return dataframe statWindow = hanning(windowLength) statWindow = statWindow / statWindow.sum() statWindow = np.tile(statWindow, [1, 1]) f_feature = dataframe.as_matrix() seg_num = f_feature.shape[0] stat_num = int(ceil(seg_num / downSampleFactor)) f_feature_stat = upfirdn(f_feature, statWindow.transpose(), 1, downSampleFactor) cut = floor((windowLength - 1) / (2 * downSampleFactor)) f_feature_stat = f_feature_stat[cut:stat_num + cut, :] timeIndex = downsample_to_proportion(dataframe.index, 1 / downSampleFactor) dfSmoothed = pd.DataFrame(f_feature_stat, index=timeIndex) return dfSmoothed
def vad_callback(data): global big_data; global WINSIZE; global vad_pub; global stream; global SAMPLE_RATE global counter; global background_noise; global FILTER signal = np.array(data.data, dtype=np.int16) #stream.write(np.asarray(signal)) print "recieved = " + str(len(signal)) + " frames = " + str(float(len(signal))/SAMPLE_RATE) + " seconds" #signal = np.asarray(big_data) window = sp.hanning(WINSIZE) ltsd = LTSD(WINSIZE,window,5) res = ltsd.compute(signal) start, end = fence(res, len(signal)) final = np.array(signal[start:end],dtype=np.float32) print 'start = ' + str(start) print 'end = ' + str(end) if end - start > SAMPLE_RATE/2: #there is speech activity in the sample #print signal print "FOUND ACTIVITY - " + str(max(final)) if FILTER and len(background_noise) > 0: #if activity is grater than half a sec: #take the last bg_noise in the list for better filtering f = cocktail(signal, background_noise[len(background_noise)-1]) vad_pub.publish(np.array(f[0], dtype=np.float32)) else: vad_pub.publish(np.array(signal, dtype=np.float32)) else: if FILTER: background_noise.append(signal) if len(background_noise) > 5: background_noise = [] background_noise.append(signal)
def test(filename=None): import random, os import matplotlib.pyplot as plt from sys import argv #signal, params = read_signal(sound,WINSIZE) scenario=None if filename != None: scene = os.path.basename(filename)[0] else: filename = random.choice([x for x in os.listdir("tmp/") if os.path.splitext(x)[1] == ".flac"]) scene = filename[0] filename = "tmp/"+filename print(filename) truths = vad.load_truths() signal,rate = speech.read_soundfile(filename) seconds = float(len(signal))/rate winsize = librosa.time_to_samples(float(WINMS)/1000, rate)[0] window = sp.hanning(winsize) ltsd = LTSD(winsize,window,5) res, threshold,nstart,nend = ltsd.compute(signal) segments = ltsd.segments(res, threshold) #print(float(len(signal))/rate, librosa.core.frames_to_time(len(res), 8000, winsize/2)) segments = librosa.core.frames_to_time(segments, rate, winsize/2) fig = plt.figure() ax = fig.add_subplot(111) #ax.plot((signal/np.max(signal))*np.mean(res)+np.mean(res)) ax.plot(np.linspace(0,seconds, len(res)), res) ax.plot([0, seconds], [threshold, threshold]) vad.plot_segments(truths[scene]['combined'], segments, ax) n1 = float(nstart)/rate n2 = float(nend)/rate ax.vlines([n1,n2], -20,20) plt.show()
def stft(data, fs, framesize = 0.075, hopsize = 0.0625): # data = a numpy array containing the signal to be processed # fs = a scalar which is the sampling frequency of the data objType = type(data).__name__.strip() if objType <> "ndarray": raise Exception('data argument is no instance of numpy.array') size = len(data) if (size < 1): raise Exception('data array is empty') frameSamp = int(framesize * fs) hopSamp = int(hopsize * fs) window = scipy.hanning(frameSamp) threshold = numpy.mean(numpy.absolute(data))*0.20 X = numpy.array([numpy.absolute(scipy.fft(window * data[i : (i + frameSamp)])) for i in xrange(0, len(data) - frameSamp, hopSamp) if numpy.mean(numpy.absolute(data[i : (i + frameSamp)])) > threshold]) # Deleting the second half of each row # Fourier Transform gives Hermite-symmetric result for real-valued input X = numpy.array([X[i][: numpy.ceil((X.shape[1] + 1.0) / 2)] for i in xrange(0, X.shape[0])]) return X
def stft(data, fftsize=1024, overlap=4): w = scipy.hanning(fftsize + 1)[:-1] return np.array([ np.fft.rfft(w * data[i:i + fftsize]) for i in range(0, len(data) - fftsize, fftsize / overlap) ])
def stft(x, fftsize, overlap): '''Computes the Short Time Fourier Transform with sensible defaults : Hanning window, window length is a power of 2 ''' hop = fftsize // overlap w = scipy.hanning(fftsize+1)[:-1] # better reconstruction with this trick +1)[:-1] return numpy.array([numpy.fft.rfft(w*x[i:i+fftsize]) for i in range(0, len(x)-fftsize, hop)])
def stft(data, fs, framesize=0.075, hopsize=0.0625): # data = a numpy array containing the signal to be processed # fs = a scalar which is the sampling frequency of the data objType = type(data).__name__.strip() if objType <> "ndarray": raise Exception('data argument is no instance of numpy.array') size = len(data) if (size < 1): raise Exception('data array is empty') frameSamp = int(framesize * fs) hopSamp = int(hopsize * fs) window = scipy.hanning(frameSamp) threshold = numpy.mean(numpy.absolute(data)) * 0.20 X = numpy.array([ numpy.absolute(scipy.fft(window * data[i:(i + frameSamp)])) for i in xrange(0, len(data) - frameSamp, hopSamp) if numpy.mean(numpy.absolute(data[i:(i + frameSamp)])) > threshold ]) # Deleting the second half of each row # Fourier Transform gives Hermite-symmetric result for real-valued input X = numpy.array([ X[i][:numpy.ceil((X.shape[1] + 1.0) / 2)] for i in xrange(0, X.shape[0]) ]) return X
def testMainSingle(self, verbose=VERBOSE.PLOT): import time # setup V = VERBOSE(verbose) TF = 21 NC = 2 spike_proto_sc = sp.cos(sp.linspace(-sp.pi, 3 * sp.pi, TF)) spike_proto_sc *= sp.hanning(TF) scale = sp.linspace(0, 2, TF) xi1 = sp.vstack( (spike_proto_sc * 5 * scale, spike_proto_sc * 4 * scale)).T xi2 = sp.vstack((spike_proto_sc * .5 * scale[::-1], spike_proto_sc * 9 * scale[::-1])).T templates = sp.asarray([xi1, xi2]) LEN = 2000 noise = sp.randn(LEN, NC) ce = TimeSeriesCovE(tf_max=TF, nc=NC) ce.update(noise) FB = BOTMNode(templates=templates, ce=ce, verbose=V, ovlp_taus=None) signal = sp.zeros_like(noise) NPOS = 4 POS = [(int(i * LEN / (NPOS + 1)), 100) for i in xrange(1, NPOS + 1)] POS.append((100, 2)) POS.append((150, 2)) for pos, tau in POS: signal[pos:pos + TF] += xi1 signal[pos + tau:pos + tau + TF] += xi2 x = sp.ascontiguousarray(signal + noise, dtype=sp.float32) # test against if V.has_print: print '### constructed spike times ###' test_u0 = sorted([t_tpl[0] for t_tpl in POS]) test_u1 = sorted([t_tpl[0] + t_tpl[1] for t_tpl in POS]) test_rval = { 0: sp.array(test_u0) + TF / 2, 1: sp.array(test_u1) + TF / 2 } if V.has_print: print test_rval # sort tic_o = time.clock() FB(x) toc_o = time.clock() if V.has_print: print '### sorting spike times ###' print FB.rval if V.has_plot: FB.plot_template_set(show=False) FB.plot_sorting(show=True) if V.has_print: print '###' print 'duration:', toc_o - tic_o for k in FB.rval: assert_array_almost_equal(FB.rval[k], test_rval[k], decimal=0)
def stft(x, width): """Short time fourier transform of a real sequence. This method performs a discrete short time Fourier transform. It uses a sliding window to perform discrete Fourier transforms on the data in the Window. The results are returned in an array. This method uses a Hanning window on the data in the window before calculating the Fourier transform. The sliding windows are overlapping by ``width / 2``. Parameters ---------- x : ndarray width: int the width of the sliding window in samples Returns ------- fourier : 2d complex array the dimensions are time, frequency; the frequencies are evenly binned from 0 to f_nyquist See Also -------- spectrum, spectrogram, scipy.hanning, scipy.fftpack.rfft """ window = sp.hanning(width) fourier = np.array([sp.fftpack.rfft(x[i:i+width] * window) for i in range(0, len(x)-width, width//2)]) fourier *= (2 / width) return fourier
def analyze_whole_waveform(waveform): """ niquist_freq = framerate / 2 precision = niquist_freq / window_size Want precision to be within 5% of target pitches or "5 cent". (+-600Hz @ 12KHz to +-10Hz @ 220Hz) window_size = framerate / 2 / precision Gives window sizes in the range of: - 400 Frames at 8K Frames/sec - 2205 Frames at 44.1K Frames/sec """ desired_precision = 10 # Hz window_size = int(waveform.framerate / 2 / desired_precision) hanning_window = hanning(window_size) spectrum = OrderedDict() for start_frame in range(0, len(waveform.frames), int((len(hanning_window) / 2) - 1)): window = zeros(len(hanning_window)) # Do I need to add a first frame case to start with half a window to # match the half window at the end of stream? for frame in range(len(window)): if start_frame + frame < len(waveform.frames): window[frame] = (hanning_window[frame] * waveform.frames[start_frame + frame]) else: window[frame] = 0 spectrum[start_frame] = analyze_window(Waveform(window)) return spectrum
def fft(self, window="hanning", nfft=None): from numpy.fft.fftpack import fft as npfft from numpy.fft import fftfreq as npfftfreq from scipy import hamming, hanning sig = self.get_data() n = sig.shape[0] if window == "hamming": win = hamming(n) elif window == "hanning": win = hanning(n) elif window == "square": win = 1 else: raise StandardError("Windows is not %s" % (window,)) #: FFT, 折り返しこみ if nfft is None: nfft = n spec = npfft(sig * win, n=nfft) #: Freq, 折り返しこみ freq = npfftfreq(nfft, d=1. / self.get_fs()) # : 折り返しを削除して返却 se = round(nfft / 2) spectrum = SpectrumData(data=spec[:se], xdata=freq[:se], name=self.name) spectrum.set_fs(self.get_fs()) return spectrum
def concentate_samples_with_windowing(wav_arrs, hop_size): """ Concentates a number of samples with a wav array, blending their ends together slightly. :param wav_arrs: The array of samples to be concentated :param hop_size: The amount of blend between the samples :return: A concentated wav array, numpy array """ length = reduce(lambda acc, x: acc + len(x), wav_arrs) lengthened_samples = [ time_shift(arr, len(arr) + 2 * hop_size) for arr in wav_arrs ] out_wav = zeros(length) curr_length = 0 for sample in enumerate(lengthened_samples) and curr_length <= length: sample_length = len(sample[1]) - 2 * hop_size window = hanning(len(sample[1])) windowed_sample = window * sample[1] if sample[0]: out_wav[0: sample_length] += \ windowed_sample[0 + hop_size, sample_length + hop_size] else: out_wav[curr_length: curr_length + sample_length] += \ windowed_sample[0 + hop_size, sample_length + hop_size] return out_wav
def test_hanning(): """ Compare scipy and Matlab hanning window. Matlab returns a N+2 size window without first and last samples""" hanning = scipy.hanning(N_FRAME + 2)[1:-1] hanning_m = eng.hanning(float(N_FRAME)) hanning_m = np.array(hanning_m._data) assert_allclose(hanning, hanning_m, atol=ATOL)
def stft(x, chunk_size, hop, w=None): """ Takes the short time fourier transform of x. Args: x: samples to window and transform. chunk_size: size of analysis window. hop: hop distance between analysis windows w: windowing function to apply. Must be of length chunk_size Returns: STFT of x (X(t, omega)) hop size apart with windows of size chunk_size. Raises: ValueError if window w is not of size chunk_size """ if not w: w = sp.hanning(chunk_size) else: if len(w) != chunk_size: raise ValueError("window w is not of the correct length {0}.".format(chunk_size)) X = sp.array([sp.fft(w*x[i:i+chunk_size]) for i in range(0, len(x)-chunk_size, hop)])/np.sqrt(((chunk_size/hop)/2)) return X
def Window1(WindowSize=10, WindowType='hanning'): ''' Window function <<Input>> WindowSize ... WindowSize WindowType ... Window Type * 'Hanning' : Hanning Window * 'Hanning2' : Hanning Window (pi delayed) * 'Rectangle' : Tectangular window <<Output>> window ... Window ''' if WindowType is 'hanning': t = np.array(range(0, WindowSize), dtype=np.float64) window = 0.5 - 0.5 * np.cos(2 * np.pi * t / WindowSize) elif WindowType is 'hanning2': t = np.array(range(0, WindowSize), dtype=np.float64) window = 0.5 - 0.5 * np.cos(2 * np.pi * t / WindowSize + np.pi) elif WindowType is 'hamming': t = np.array(range(0, WindowSize), dtype=np.float64) window = 0.54 - 0.46 * np.cos(2 * np.pi * t / WindowSize) elif WindowType is 'rectangle': window = sp.ones(WindowSize) else: print(WindowType + " is not supported window type.") print("hanning window is used.") window = sp.hanning(WindowSize) return window
def stft(x, fs, framesz, hop): framesamp = int(framesz*fs) hopsamp = int(hop*fs) w = scipy.hanning(framesamp) X = scipy.array([scipy.fft(w*x[i:i+framesamp]) for i in range(0, len(x)-framesamp, hopsamp)]) return X
def __init__(self, hp): super(WaveEncoder, self).__init__() ## frond-end part self.epsilon = 1e-8 # Like preemphasis filter self.preemp = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=2, stride=1, padding=0, bias=False) # init tmp = torch.zeros((1,1,2)).to(DEVICE) tmp.data[:,:,0] = -0.97 tmp.data[:,:,1] = 1 self.preemp.weight.data = torch.tensor(tmp) # if 16kHz self.comp = nn.Conv1d(in_channels=1, out_channels=80, kernel_size=400, stride=1, padding=0, bias=False) nn.init.kaiming_normal_(self.comp.weight.data) # B x 400 (0.01s = 10ms) tmp = np.zeros((40, 1, 400)) tmp[:, :] = scipy.hanning(400 + 1)[:-1] tmp = tmp * tmp K = torch.tensor(tmp, dtype=torch.float).to(DEVICE) self.lowpass_weight = K self.instancenorm = nn.InstanceNorm1d(40) # encoder part if hp.frame_stacking: input_size = hp.lmfb_dim * hp.frame_stacking else: input_size = hp.lmfb_dim self.bi_lstm = nn.LSTM(input_size=input_size, hidden_size=hp.num_hidden_nodes, num_layers=hp.num_encoder_layer, \ batch_first=True, dropout=hp.encoder_dropout, bidirectional=True)
def fft(self, window="hanning", nfft=None): from numpy.fft.fftpack import fft as npfft from numpy.fft import fftfreq as npfftfreq from scipy import hamming, hanning sig = self.get_data() n = sig.shape[0] if window == "hamming": win = hamming(n) elif window == "hanning": win = hanning(n) elif window == "square": win = 1 else: raise StandardError("Windows is not %s" % (window, )) #: FFT, 折り返しこみ if nfft is None: nfft = n spec = npfft(sig * win, n=nfft) #: Freq, 折り返しこみ freq = npfftfreq(nfft, d=1. / self.get_fs()) # : 折り返しを削除して返却 se = round(nfft / 2) spectrum = SpectrumData(data=spec[:se], xdata=freq[:se], name=self.name) spectrum.set_fs(self.get_fs()) return spectrum
def stft(x, chunk_size, overlap=1): import scipy hop = chunk_size / overlap w = scipy.hanning(chunk_size + 1)[:-1] cnt = 0 return np.array([np.fft.rfft(w * x[i:i + chunk_size]) for i in range(0, len(x) - chunk_size, hop)])
def stft(x, fs, framesz, hop): framesamp = int(framesz*fs) # with a frame size of 50 milliseconds hopsamp = int(hop*fs) # and hop size of 25 milliseconds. w = scipy.hanning(framesamp) X = scipy.array([scipy.fft(w*x[i:i+framesamp]) for i in range(0, len(x)-framesamp, hopsamp)]) return X
def stft(x, fftsize=1024, overlap=4): """fftsize is in samples """ hop = fftsize / overlap w = scipy.hanning(fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] return np.array([np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop)])
def istft(X, chunk_size, hop, w=None): """ Naively inverts the short time fourier transform using an overlap and add method. The overlap is defined by hop Args: X: STFT windows to invert, overlap and add. chunk_size: size of analysis window. hop: hop distance between analysis windows w: windowing function to apply. Must be of length chunk_size Returns: ISTFT of X using an overlap and add method. Windowing used to smooth. Raises: ValueError if window w is not of size chunk_size """ if not w: w = sp.hanning(chunk_size) else: if len(w) != chunk_size: raise ValueError("window w is not of the correct length {0}.".format(chunk_size)) x = sp.zeros(len(X) * (hop)) i_p = 0 for n, i in enumerate(range(0, len(x)-chunk_size, hop)): x[i:i+chunk_size] += w*sp.real(sp.ifft(X[n])) return x
def stft(x, fs, framesz = .05, hop = .025): framesamp = int(framesz*fs) hopsamp = int(hop*fs) w = scipy.hanning(framesamp) X = scipy.array([np.fft.rfft(w*x[i:i+framesamp]) for i in range(0, len(x)-framesamp, hopsamp)]) return np.real(X)
def mmse_stsa(infile, outfile, noise_sum): signal, params = read_signal(infile, WINSIZE) nf = len(signal)/(WINSIZE/2) - 1 sig_out=sp.zeros(len(signal),sp.float32) G = sp.ones(WINSIZE) prevGamma = G alpha = 0.98 window = sp.hanning(WINSIZE) gamma15=spc.gamma(1.5) lambdaD = noise_sum / 5.0 percentage = 0 for no in xrange(nf): p = int(math.floor(1. * no / nf * 100)) if (p > percentage): percentage = p print "{}%".format(p), y = get_frame(signal, WINSIZE, no) Y = sp.fft(y*window) Yr = sp.absolute(Y) Yp = sp.angle(Y) gamma = Yr**2/lambdaD xi = alpha * G**2 * prevGamma + (1-alpha)*sp.maximum(gamma-1, 0) prevGamma = gamma nu = gamma * xi / (1+xi) G = (gamma15 * sp.sqrt(nu) / gamma ) * sp.exp(-nu/2) * ((1+nu)*spc.i0(nu/2)+nu*spc.i1(nu/2)) idx = sp.isnan(G) + sp.isinf(G) G[idx] = xi[idx] / (xi[idx] + 1) Yr = G * Yr Y = Yr * sp.exp(Yp*1j) y_o = sp.real(sp.ifft(Y)) add_signal(sig_out, y_o, WINSIZE, no) write_signal(outfile, params, sig_out)
def stft(x, chunk_size, hop, w=None): """ Takes the short time fourier transform of x. Args: x: samples to window and transform. chunk_size: size of analysis window. hop: hop distance between analysis windows w: windowing function to apply. Must be of length chunk_size Returns: STFT of x (X(t, omega)) hop size apart with windows of size chunk_size. Raises: ValueError if window w is not of size chunk_size """ if not w: w = sp.hanning(chunk_size) else: if len(w) != chunk_size: raise ValueError( "window w is not of the correct length {0}.".format( chunk_size)) X = sp.array([ sp.fft(w * x[i:i + chunk_size]) for i in range(0, len(x) - chunk_size, hop) ]) / np.sqrt(((float(chunk_size) / float(hop)) / 2.0)) return X
def stft(x, fftsize=64, overlap_pct=.5): hop = int(fftsize * (1 - overlap_pct)) w = scipy.hanning(fftsize + 1)[:-1] raw = np.array([np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop)]) return raw[:, :(fftsize // 2)] import matplotlib.pyplot as plt
def istft(X, chunk_size, hop, w=None): """ Naively inverts the short time fourier transform using an overlap and add method. The overlap is defined by hop Args: X: STFT windows to invert, overlap and add. chunk_size: size of analysis window. hop: hop distance between analysis windows w: windowing function to apply. Must be of length chunk_size Returns: ISTFT of X using an overlap and add method. Windowing used to smooth. Raises: ValueError if window w is not of size chunk_size """ if not w: w = sp.hanning(chunk_size) else: if len(w) != chunk_size: raise ValueError( "window w is not of the correct length {0}.".format( chunk_size)) x = sp.zeros(len(X) * (hop)) i_p = 0 for n, i in enumerate(range(0, len(x) - chunk_size, hop)): x[i:i + chunk_size] += w * sp.real(sp.ifft(X[n])) return x
def stft(x, fftsize, overlap, beta): if beta == 0: hop = fftsize / overlap w = sc.blackman( fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] X = np.array([ np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop) ]) y = np.linspace(0, bins[-1], np.shape(X)[1]) x = np.linspace(0, tid[-1], np.shape(X)[0]) return X, x, y if beta != 0: hop = fftsize / overlap w = sc.hanning( fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] X = np.array([ np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop) ]) y = np.linspace(0, bins[-1], np.shape(X)[1]) x = np.linspace(0, tid[-1], np.shape(X)[0]) return X, x, y
def timescale(data, scaling=1): """Scales the playback_duration of input_filename, while keeping pitch constant.""" length = len(data) phi = scipy.zeros(N) out = scipy.zeros(N, dtype=complex) sigout = scipy.zeros(length / scaling + N) amplitude = max(data) window = scipy.hanning(N) for index in scipy.arange(0, length - (N + H), H * scaling): spec1 = scipy.fft(window * data[index:index + N]) spec2 = scipy.fft(window * data[index + H:index + N + H]) phi += scipy.angle(spec2 / spec1) phi %= 2 * scipy.pi out.real, out.imag = scipy.cos(phi), scipy.sin(phi) out_index = int(index / scaling) sigout[out_index:out_index + N] += (window * scipy.ifft(scipy.absolute(spec2) * out)).real sigout *= amplitude / max(sigout) return scipy.array(sigout, dtype='int16')
def tfplots(data, Fs = 44100, color = 'b', fract=3): octbin = 100. FFTSIZE = 2**18 logfact = 2**(1./octbin) LOGN = np.floor(np.log(Fs/2)/np.log(logfact)) # logarithmic scale from 1 Hz to Fs/2 logscale = np.power(logfact, np.r_[:LOGN]) # creating a half hanning window WL = data.size hann = sp.hanning(WL*2) endwin = hann[WL:2*WL] tf = fft(data*endwin, FFTSIZE) magn = np.abs(tf[:FFTSIZE/2]) compamp = tf[:FFTSIZE/2] # creating 100th octave resolution log. spaced data from the lin. spaced FFT data logmagn = np.empty(LOGN) fstep = Fs/np.float64(FFTSIZE) for k in range(logscale.size): start = np.round(logscale[k]/np.sqrt(logfact)/fstep) start = np.maximum(start,1) start = np.minimum(start, FFTSIZE/2) stop = np.round(logscale[k]*np.sqrt(logfact)/fstep) stop = np.maximum(stop,1) stop = np.minimum(stop, FFTSIZE/2) # averaging the power logmagn[k] = np.sqrt(np.mean(np.power(magn[start-1:stop],2))) # creating hanning window # fractional octave smoothing HL = 2 * np.round(octbin/fract) hh = sp.hanning(HL) L = logmagn.size logmagn[L-1:L+HL] = 0 # Smoothing the log. spaced data by convonvling with the hanning window tmp = fftfilt(hh, np.power(logmagn,2)) smoothmagn = np.sqrt(tmp[HL/2:HL/2+L]/hh.sum(axis=0)) # plotting plt.semilogx(logscale, 20*np.log10(smoothmagn), color)
def tfplots(data, Fs=44100, color='b', fract=3): octbin = 100. FFTSIZE = 2**18 logfact = 2**(1. / octbin) LOGN = np.floor(np.log(Fs / 2) / np.log(logfact)) # logarithmic scale from 1 Hz to Fs/2 logscale = np.power(logfact, np.r_[:LOGN]) # creating a half hanning window WL = data.size hann = sp.hanning(WL * 2) endwin = hann[WL:2 * WL] tf = fft(data * endwin, FFTSIZE) magn = np.abs(tf[:FFTSIZE / 2]) compamp = tf[:FFTSIZE / 2] # creating 100th octave resolution log. spaced data from the lin. spaced FFT data logmagn = np.empty(LOGN) fstep = Fs / np.float64(FFTSIZE) for k in range(logscale.size): start = np.round(logscale[k] / np.sqrt(logfact) / fstep) start = np.maximum(start, 1) start = np.minimum(start, FFTSIZE / 2) stop = np.round(logscale[k] * np.sqrt(logfact) / fstep) stop = np.maximum(stop, 1) stop = np.minimum(stop, FFTSIZE / 2) # averaging the power logmagn[k] = np.sqrt(np.mean(np.power(magn[start - 1:stop], 2))) # creating hanning window # fractional octave smoothing HL = 2 * np.round(octbin / fract) hh = sp.hanning(HL) L = logmagn.size logmagn[L - 1:L + HL] = 0 # Smoothing the log. spaced data by convonvling with the hanning window tmp = fftfilt(hh, np.power(logmagn, 2)) smoothmagn = np.sqrt(tmp[HL / 2:HL / 2 + L] / hh.sum(axis=0)) # plotting plt.semilogx(logscale, 20 * np.log10(smoothmagn), color)
def stft(x): h = sp.hanning(chunk) X = np.array([ np.fft.fft(h * x[i:i + chunk]) for i in range(0, len(x) - chunk, hop_in) ]) return X
def istft(X, fs, T, hop): x = scipy.zeros(int(T * fs)) framesamp = X.shape[1] hopsamp = int(hop * fs) w = scipy.hanning(framesamp) for n, i in enumerate(range(0, len(x) - framesamp, hopsamp)): x[i:i + framesamp] += scipy.real(scipy.ifft(X[n])) return x
def stft(x, fftsize=1024, overlap=4): hop = fftsize / overlap w = scipy.hanning(fftsize + 1)[:-1] return np.array([ np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize, hop) ])
def stft(x, fftsize=1024, overlap=4): hop = fftsize / overlap w = scipy.hanning(fftsize+1)[:-1] # better reconstruction with this trick +1)[:-1] l = [] for i in range(0, len(x)-fftsize, hop): v = np.fft.rfft(w*x[i:i+fftsize]) l.append(np.abs(v)**2/np.max(np.abs(v)**2)) return np.array(l)
def test_hanning(): """ Compare scipy and Matlab hanning window. Matlab returns a N+2 size window without first and last samples. A custom Octave function has been written to mimic this behavior.""" hanning = scipy.hanning(N_FRAME+2)[1:-1] hanning_m = np.squeeze(octave.feval('octave/ml_hanning.m', N_FRAME)) assert_allclose(hanning, hanning_m, atol=ATOL)
def signal_fft(signal, N): #FFTするsignal長と窓長Nは同じサンプル数に固定する win = hanning(N) # 窓関数 spectrum = fft(signal * win) # フーリエ変換 spectrum_abs = np.abs(spectrum) # 振幅を元に信号に揃える half_spectrum = spectrum_abs[:int(N / 2)] half_spectrum[0] = half_spectrum[0] / 2 # 直流成分(今回は扱わないけど)は2倍不要 half_spectrum_dBV = 20 * np.log10(half_spectrum) return spectrum, half_spectrum_dBV
def stft(x, fftsize=1024, overlap=4): hop = fftsize / overlap w = scipy.hanning(fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] l = [] for i in range(0, len(x) - fftsize, hop): v = np.fft.rfft(w * x[i:i + fftsize]) l.append(np.abs(v)**2 / np.max(np.abs(v)**2)) return np.array(l)
def stft(x, framesamp): hopsamp = framesamp / 2 w = scipy.hanning(framesamp) X = np.array([ scipy.fft(w * x[i:i + framesamp]) for i in range(0, len(x) - framesamp, hopsamp) ]) return X
def ramps(data, fs, duration=10, shape='raisedcosine', set='onoff'): '''Applies ramps to the onsets and/or offsets of a signal Parameters ---------- sig : array The input signal. fs : scalar The sampling frequency. duration : scalar The duration of each ramp, in ms [default = 10]. shape : string Specifies the shape of the ramp. Possibilities include: 'raisedcosine' [default] 'hanning' 'hamming' 'linear' set : string Specifies where to apply ramps: 'on' : apply to onset of signal only 'off' : apply to offest only 'onoff' : apply to both Returns ------- y : array The ramped signal. ''' dur = np.int(np.round(np.float32(duration)*(np.float32(fs)/1000.))) wspace=np.round(2.*dur) if shape is 'raisedcosine': rf = np.power((((np.cos(np.pi+2*np.pi*np.arange(0,wspace-1)/(wspace-1)))*.5)+.5),2) elif shape is 'hanning': rf = hanning(wspace) elif shape is 'hamming': rf = hamming(wspace) elif shape is 'linear': r = np.linspace(0, 1, dur) rf = np.concatenate((r, r[::-1])) else: raise Exception("shape not recognized") f_ramp = np.ones(data.shape[0]) if set in ['on', 'onoff']: f_ramp[0:dur] = rf[0:dur] if set in ['off', 'onoff']: durp1 = dur-1 f_ramp[-(durp1):] = rf[-(durp1):] # if len(data.shape) == 2: # f_ramp_1d = rf.copy() # for c in range(data.shape[1]-1): # f_ramp = np.column_stack((f_ramp, f_ramp_1d)) return (data.T * f_ramp).T
def stft(x, fftsize=1024, overlap=4): hop = int(fftsize / overlap) w = scipy.hanning(fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] return np.array([ np.fft.rfft(w * x[i:i + fftsize]) for i in range(0, len(x) - fftsize + 1, hop) ])
def stft(x, fs, hop, width): """ Compute the Short Time Fourier Transform of 'x', with sample rate 'fs' (Hz), window width 'width' (samples), and hop length 'hop' (samples) Ideally, width is even (this works better with the FFT) """ window = sp.hanning(width) out = sp.array([ao.fft(window*x[i:i+width]) for i in range(0, len(x)-width, hop)]) times = np.arange(width/float(2*fs), len(x)/float(fs)-width/float(2*fs), hop/float(fs)) freqs = fs*sp.array([i/float(width) for i in range(0, width/2+1)]) return {'stft' : out, 'times' : times, 'frequencies' : freqs}
def reduce_noise(signal, noisy_signal, winsize=2**10, window=sp.hanning(2**10)): """ Reduce noise """ method = SpectralSubtraction(winsize, window) out = sp.zeros(len(signal), sp.float32) power = sig.welch(noisy_signal, window=window, return_onesided=False, scaling='spectrum')[1] * window.sum()**2 nf = len(signal)/(winsize/2) - 1 for no in xrange(nf): s = get_frame(signal, winsize, no) add_signal(out, method.compute_by_noise_pow(s, power), winsize, no) return out
def istft(X, overlap=1): fftsize=(X.shape[1]-1)*2 hop = fftsize / overlap w = scipy.hanning(fftsize+1)[:-1] x = scipy.zeros(X.shape[0]*hop) wsum = scipy.zeros(X.shape[0]*hop) for n,i in enumerate(range(0, len(x)-fftsize, hop)): x[i:i+fftsize] += scipy.real(np.fft.irfft(X[n])) * w # overlap-add wsum[i:i+fftsize] += w ** 2. pos = wsum != 0 x[pos] /= wsum[pos] return x
def stft(x, fs, framesz, hop): #print("STFT got", x, fs, framesz, hop) framesamp = int(framesz*fs) hopsamp = int(hop*fs) w = scipy.hanning(framesamp) def do_fft(w,x,i,framesamp): #print("Running FFT for ", i, framesamp) return fft(w*x[i:i+framesamp]) X = scipy.array([do_fft(w,x,i,framesamp) for i in range(0, len(x)-framesamp, hopsamp)]) #print("X SHAPE IS", len(X), len(X[0])) return X
def stft(x,framesz): """ Get the stft of a signal x x : array_like The signal framesz : int The window/fft length in samples """ hop = int(float(framesz)/2) x = numpy.append(numpy.zeros(framesz), x) # Pad so we can reconstruct the whole signal x = numpy.append(x, numpy.zeros(framesz)) w = scipy.hanning(framesz) X = scipy.array([scipy.fft(w*x[i:i+framesz]) for i in range(hop, len(x)-(framesz), hop)]) return X
def calc_noise(filepath): noise_sum=None signal, params = read_signal(filepath, WINSIZE) nf = len(signal)/(WINSIZE/2) - 1 noise_sum=sp.zeros(WINSIZE,sp.float32) window = sp.hanning(WINSIZE) for no in xrange(nf): y = get_frame(signal, WINSIZE, no) Y = sp.fft(y*window) Yr = sp.absolute(Y) Yp = sp.angle(Y) if ( no < 20 ): noise_sum = noise_sum + Yr**2 else: break return noise_sum
def stft(samples, samplerate, framesz=0.050, hop=0.025): """ spectragram args: framesz: frame size hop: hop size """ framesamp = int(framesz * samplerate) hopsamp = int(hop * samplerate) w = scipy.hanning(framesamp) X = scipy.array([scipy.fft(w * samples[i:i + framesamp]) for i in range(0, len(samples) - framesamp, hopsamp)]) transposed = np.transpose(X) # time on xaxes return transposed
def test(): # wavfile = "../wav/aiueo.wav" wavfile = "./golf_D.wav" # data, fs, enc = wavread(wavfile) data, fs = wavread(wavfile) ### STFT fftLen = 1024 win = hanning(fftLen) step = fftLen / 8 spectrogram = abs(stft(data, win, step)[:, : fftLen / 2 + 1]).T ### 表示 fig = pl.figure() fig.patch.set_alpha(0.) imshow_sox(spectrogram) pl.tight_layout() pl.show()
def spectre(audio, win=512, poverlap=.75): overlap = np.floor(win * poverlap) nfft = win l = len(audio) w = scipy.hanning(win+2)[1:-1] position = 0 count = 0 spec = np.zeros((nfft, np.floor((l - win) / (win-overlap) + 1))) phase = np.zeros_like(spec) while position + win - 1 <= l: y = audio[position:position+win] * w tmp_fft = np.fft.fft(y, nfft) spec[:, count] = np.abs(tmp_fft) phase[:, count] = np.angle(tmp_fft) position += win - overlap count += 1 spec = spec[: np.ceil((nfft + 1) / 2), :] return spec, phase
def stft(x, fftsize=1024, overlap=4, ban=0): """ Short Time Fourier Transform :param x: Signal :param fftsize: Window length :param overlap: Overlaping between consecutive frequencies :param ban: numer of Frequencies to null :return: """ hop = int(fftsize / overlap) w = scipy.hanning(fftsize + 1)[:-1] # better reconstruction with this trick +1)[:-1] l = [] for i in range(0, len(x) - fftsize, hop): v = np.fft.rfft(w * x[i:i + fftsize]) for j in range(ban): v[j] = 0 l.append(np.abs(v) ** 2 / np.max(np.abs(v) ** 2)) return np.array(l)
def plot_spectrogram(waveform, sampling_rate, window_name, filename): """ スペクトログラムを表示 """ window_duration = 40.0 * 1.0e-3 # 窓関数の長さ、単位は秒 window_shift = 5.0 * 1.0e-3 # 窓関数をスライドさせる長さ、単位は秒 window_size = int(window_duration * sampling_rate) # 窓関数のサンプル数 window_overlap = int((window_duration - window_shift) * sampling_rate) # 隣接する窓関数の重なり # 窓関数本体 if window_name == "hanning": window = scipy.hanning(window_size) # ハニング窓 elif window_name == "hamming": window = scipy.hamming(window_size) # ハミング窓 elif window_name == "gaussian": window = scipy.gaussian(window_size) # ガウス窓?? elif window_name == "blackman": window = scipy.blackman(window_size) # ブラックマン窓 elif window_name == "trianglar": window = scipy.triang(window_size) # 三角窓?? elif window_name == "rectanglar": window = scipy.rectang(window_size) # 矩形窓?? else: print "The window function name is wrong." exit() sp, freqs, times, ax = plt.specgram( waveform, NFFT=window_size, Fs=sampling_rate, window=window, noverlap=window_overlap ) plt.title("Spectrogram [" + window_name + "] (" + filename + ")") plt.xlabel("Time[sec]") plt.ylabel("Frequency[Hz]") plt.xlim([0, times[-1]]) plt.ylim([0, 5000]) plt.savefig("graph/spectrogram/" + filename.split("/") [1].split(".")[0] + "_" + window_name + ".png")
def vad(soundfile, noisefile=None): signal,rate = speech.read_soundfile(soundfile) if noisefile != None: noise,nrate = speech.read_soundfile(noisefile) print("found noisefile: "+noisefile) else: noise = None seconds = float(len(signal))/rate winsize = librosa.time_to_samples(float(WINMS)/1000, rate)[0] window = sp.hanning(winsize) ltsd = LTSD(winsize,window,5, init_noise=noise) res, threshold,nstart,nend = ltsd.compute(signal) segments, = ltsd.segments(res, threshold) #print(float(len(signal))/rate, librosa.core.frames_to_time(len(res), 8000, winsize/2)) segments = librosa.core.samples_to_time(segments, rate).tolist() indexes = [] for s in segments: indexes += s indexes.append(seconds) return indexes
def inv_spectre(spec, phase, poverlap=.75): win = (spec.shape[0] - 1) * 2 nfft = win overlap = np.floor(win * poverlap) a = spec[::-1] spec = np.concatenate((spec, a[1:-1, :])) n = 0 w = scipy.hanning(win+2)[1:-1] signal = np.zeros_like(spec) while n < spec.shape[1]: signal[:, n] = np.real(np.fft.ifft(np.exp(1j*phase[:, n]) * (spec[:, n]), nfft)) * w n += 1 f_signal = np.zeros((spec.shape[1]-1)*(win-overlap) + win) normalization = np.zeros_like(f_signal) step = win - overlap for k in range(spec.shape[1]): f_signal[k * step: win + k * step] = f_signal[k * step: win + k * step] + signal[:, k] normalization[k * step: win + k * step] = normalization[k * step: win + k * step] + w signal = f_signal / (overlap / win * normalization) return signal
def tfplot(data, Fs = 44100, color = 'b', octbin = 100, avg = 'comp'): FFTSIZE=2**18 logfact = 2**(1./octbin) LOGN = np.floor(np.log(Fs/2)/np.log(logfact)) # logarithmic scale from 1 Hz to Fs/2 logscale = np.power(logfact, np.r_[:LOGN]) # creating a half hanning window WL = data.size hann = sp.hanning(WL*2) endwin = hann[WL:2*WL] tf = fft(data*endwin, FFTSIZE) compamp = tf[:FFTSIZE/2] logmagn = np.empty(LOGN) fstep = Fs/np.float64(FFTSIZE) for k in range(logscale.size): #finding the start and end positions of the logaritmic bin start = np.round(logscale[k]/np.sqrt(logfact)/fstep) start = np.maximum(start, 1); start = np.minimum(start, FFTSIZE/2) stop = np.round(logscale[k]*np.sqrt(logfact)/fstep)-1 stop = np.maximum(stop, start) stop = np.maximum(stop, 1) stop = np.minimum(stop, FFTSIZE/2) #averaging the complex transfer function if avg is 'comp': logmagn[k] = np.abs(np.mean(compamp[start-1:stop])) elif avg is 'abs': logmagn[k] = np.mean(np.abs(compamp[start-1:stop])) elif avg is 'power': logmagn[k] = np.sqrt(np.mean(np.abs(np.power(compamp[start-1:stop],2)))) # plotting plt.semilogx(logscale, 20*np.log10(logmagn), color)