def __init__(self, calibration_signal, step_size, pb_ff, nfft, shift=None, win=None): self.step_size = step_size # for the LMS self.pb_ff = pb_ff # forgetting factor for the projection back self.nfft = nfft self.nchannel = calibration_signal.shape[1] if shift is None: shift = nfft // 2 if win is None: win = pra.hann(nfft) # Compute the fixed beamformer X = pra.transform.analysis(calibration_signal, nfft, shift, win=win) self.fixed_weights = calibration(X)[1:, :] # remove DC # gsc adaptive weights self.adaptive_weights = np.zeros_like(self.fixed_weights) # projection back weights self.pb_den = np.ones(self.fixed_weights.shape[0], dtype=self.fixed_weights.dtype) self.pb_num = np.ones(self.fixed_weights.shape[0], dtype=np.float)
def with_half_overlap_no_filter(D): if D == 1: x_local = x[:,0] else: x_local = x[:,:D] # parameters block_size = 512 # make sure the FFT size is a power of 2 hop = block_size // 2 # half overlap window = pra.hann(block_size) # the analysis window # Create the STFT object stft = pra.realtime.STFT(block_size, hop=hop, analysis_window=window, channels=D, transform=transform) # collect the processed blocks processed_x = np.zeros(x_local.shape) # process the signals while full blocks are available n = 0 while x_local.shape[0] - n > hop: # go to frequency domain stft.analysis(x_local[n:n+hop,]) # copy processed block in the output buffer processed_x[n:n+hop,] = stft.synthesis() n += hop error = np.max(np.abs(x_local[:n-hop,] - processed_x[hop:n,])) return error
def apply_spectral_sub( noisy_signal, nfft=512, db_reduc=25, lookback=12, beta=30, alpha=1 ): """ One-shot function to apply spectral subtraction approach. Parameters ---------- noisy_signal : numpy array Real signal in time domain. nfft: int FFT size. Length of gain filter, i.e. the number of frequency bins, is given by ``nfft//2+1``. db_reduc: float Maximum reduction in dB for each bin. lookback: int How many frames to look back for the noise estimate. beta: float Overestimation factor to "push" the gain filter value (at each frequency) closer to the dB reduction specified by ``db_reduc``. alpha: float, optional Exponent factor to modify transition behavior towards the dB reduction specified by ``db_reduc``. Default is 1. Returns ------- numpy array Enhanced/denoised signal. """ from pyroomacoustics import hann from pyroomacoustics.transform import STFT hop = nfft // 2 window = hann(nfft, flag="asymmetric", length="full") stft = STFT(nfft, hop=hop, analysis_window=window, streaming=True) scnr = SpectralSub(nfft, db_reduc, lookback, beta, alpha) processed_audio = np.zeros(noisy_signal.shape) n = 0 while noisy_signal.shape[0] - n >= hop: # SCNR in frequency domain stft.analysis( noisy_signal[ n : (n + hop), ] ) gain_filt = scnr.compute_gain_filter(stft.X) # back to time domain processed_audio[ n : n + hop, ] = stft.synthesis(gain_filt * stft.X) # update step n += hop return processed_audio
def __init__(self, calibration_signal, step_size, pb_ff, nfft, ds, shift=None, win=None): self.step_size = step_size # for the LMS self.pb_ff = pb_ff # forgetting factor for the projection back self.nfft = nfft self.nchannel = calibration_signal.shape[1] self.ds = ds if shift is None: shift = nfft // 2 if win is None: win = pra.hann(nfft) # Compute the fixed beamformer X = pra.transform.analysis(calibration_signal, nfft, shift, win=win) self.fixed_weights = calibration(X)[1:, :] # remove DC self.norm_weights = 1. / np.linalg.norm( self.fixed_weights, axis=1, keepdims=True)**2 # gsc adaptive weights self.adaptive_weights = np.zeros( (self.fixed_weights.shape[0], self.nchannel // ds), dtype=self.fixed_weights.dtype) self.adaptive_weights[:, 0] = 1. # projection back weights self.pb_den = np.ones(self.fixed_weights.shape[0], dtype=self.fixed_weights.dtype) self.pb_num = np.ones(self.fixed_weights.shape[0], dtype=np.float) self.estimates = { 'covmat': LeakyIntegration( 0.9, # average over this number of frames lambda X: X[:, :, None] * np.conj(X[:, None, :] ), # (nfreq, nchan, nchan), init=np.array([ np.eye(self.nchannel // self.ds) for i in range(self.nfft // 2) ]) * 1e-3, ), 'xcov': LeakyIntegration( 0.9, lambda v: v[0] * np.conj(v[1][:, None]), ), }
def half_overlap(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = block_size//2 # analysis analysis_win = pra.hann(block_size) X = analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis x_r = synthesis(X, L=block_size, hop=hop) return pra.dB(np.max(np.abs(x_local[:-hop, ] - x_r[hop:, ])))
def hop_one_sample(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = 1 # analysis analysis_win = pra.hann(block_size) X = analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis synthesis_win = pra.transform.compute_synthesis_window(analysis_win, hop) x_r = synthesis(X, L=block_size, hop=hop, win=synthesis_win) return pra.dB( np.max(np.abs(x_local[:-block_size + hop, ] - x_r[block_size - hop:, ])))
def append_one_sample(D): hop = block_size // 2 n_samples = x.shape[0] n_frames = n_samples // hop x_local = x[:n_frames * hop - 1, :] if D == 1: x_local = x_local[:, 0] else: x_local = x_local[:, :D] # analysis analysis_win = pra.hann(block_size) X = analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis x_r = synthesis(X, L=block_size, hop=hop) return pra.dB( np.max( np.abs(x_local[:-block_size + hop, ] - x_r[block_size - hop:-1, ])))
def process_experiment_max_sinr(SIR, mic, args): nfft = args.nfft vad_guard = args.vad_guard if args.thresh is None: vad_thresh = thresh_opt[SIR] else: vad_thresh = args.thresh # read_in the mix signals fs_led, leds = wavfile.read( file_pattern.format('camera_leds_zero_hold', 'mix', SIR)) fs_snd, audio = wavfile.read( file_pattern.format(mic_choices[mic], 'mix', SIR)) assert fs_led == fs_snd # read in the ref signals r, noise_ref = wavfile.read( file_pattern.format(mic_choices[mic], 'noise_ref', SIR)) assert r == fs_snd r, speech_ref = wavfile.read(file_speech_ref.format(mic_choices[mic])) assert r == fs_snd r, leds_ref = wavfile.read(file_speech_ref.format('camera_leds_zero_hold')) assert r == fs_snd # In case of objective evaluation, we do an artificial mix if args.synth_mix: audio = noise_ref + speech_ref # get the geometry information to get nice plots. mics_loc = np.array(protocol['geometry']['microphones'][mic]['reference']) noise_loc = protocol['geometry']['speakers']['locations'][0] speech_loc = protocol['geometry']['speakers']['locations'][1] # the directions of arrival theta_speech = 0 p0 = speech_loc - mics_loc p1 = noise_loc - mics_loc theta_noise = np.arccos(np.inner(p0, p1) / la.norm(p0) / la.norm(p1)) print('Source separation', theta_noise / np.pi * 180) if mic == 'pyramic': I = list(range(8, 16)) + list(range(24, 32)) + list(range( 40, 48)) # flat part #I = list(range(24,32)) + list(range(40,48)) # flat part #I = list(range(8,16)) #I = list(range(48)) audio = audio[:, I] noise_ref = noise_ref[:, I].copy() speech_ref = speech_ref[:, I].copy() mics_positions = mics_geom['pyramic'][I].copy() # place in room 2-806 mics_positions -= np.mean(mics_positions, axis=0)[None, :] mics_positions[:, 2] -= np.max(mics_positions[:, 2]) mics_positions += mics_loc elif mic == 'olympus': mics_positions = mics_geom['olympus'].copy() + mics_loc n_samples = audio.shape[0] # shorthand n_channels = audio.shape[1] # perform VAD vad_snd = leds > vad_thresh # Now we want to make sure no speech speech goes in estimation of the noise covariance matrix. # For that we will remove frames neighbouring the detected speech vad_guarded = vad_snd.copy() if vad_guard is not None: for i, v in enumerate(vad_snd): if np.any(vad_snd[i - vad_guard:i + vad_guard]): vad_guarded[i] = True ############################## ## STFT and frame-level VAD ## ############################## print('STFT and stuff') sys.stdout.flush() engine = pra.realtime.STFT(nfft, nfft // 2, pra.hann(nfft), channels=audio.shape[1]) def analysis(x): engine.analysis(x) return np.moveaxis(engine.X, 1, 0) # Now compute the STFT of the microphone input X = analysis(audio) X_time = np.arange(1, X.shape[0] + 1) * (nfft / 2) / fs_snd X_speech = analysis(audio * vad_guarded[:, None]) X_noise = analysis(audio * (1 - vad_guarded[:, None])) S_ref = analysis(speech_ref) N_ref = analysis(noise_ref) ########################## ## MAX SINR BEAMFORMING ## ########################## print('Max SINR beamformer computation') sys.stdout.flush() # covariance matrices from noisy signal Rs = np.einsum('i...j,i...k->...jk', X_speech, np.conj(X_speech)) Rn = np.einsum('i...j,i...k->...jk', X_noise, np.conj(X_noise)) # compute covariances with reference signals to check everything is working correctly #Rs = np.einsum('i...j,i...k->...jk', S_ref, np.conj(S_ref)) #Rn = np.einsum('i...j,i...k->...jk', N_ref, np.conj(N_ref)) # compute the MaxSINR beamformer w = [ la.eigh(rs, b=rn, eigvals=(n_channels - 1, n_channels - 1))[1] for rs, rn in zip(Rs[1:], Rn[1:]) ] w = np.squeeze(np.array(w)) nw = la.norm(w, axis=1) w[nw > 1e-10, :] /= nw[nw > 1e-10, None] w = np.concatenate([np.ones((1, n_channels)), w], axis=0) if not args.no_norm: # normalize with respect to input signal z = compute_gain(w, X_speech, X_speech[:, :, 0], clip_up=args.clip_gain) w *= z[:, None] ########### ## APPLY ## ########### print('Apply beamformer') sys.stdout.flush() # 2D beamformer mic_array = pra.Beamformer(mics_positions[:, :2].T, fs=fs_snd, N=nfft, hop=nfft, zpb=nfft) mic_array.signals = audio.T mic_array.weights = w.T out = mic_array.process() # Signal alignment step ref = np.vstack([speech_ref[:, 0], noise_ref[:, 0]]) # Not sure why the delay is sometimes negative here... Need to check more delay = np.abs( int(pra.tdoa(out, speech_ref[:, 0].astype(np.float), phat=True))) if delay > 0: out_trunc = out[delay:delay + ref.shape[1]] noise_eval = audio[:ref.shape[1], 0] - out_trunc else: out_trunc = np.concatenate( (np.zeros(-delay), out[:ref.shape[1] + delay])) noise_eval = audio[:ref.shape[1], 0] - out_trunc sig_eval = np.vstack([out_trunc, noise_eval]) # We use the BSS eval toolbox metric = bss_eval_images(ref[:, :, None], sig_eval[:, :, None]) # we are only interested in SDR and SIR for the speech source SDR_out = metric[0][0] SIR_out = metric[2][0] ################## ## SAVE SAMPLES ## ################## if args.save_sample is not None: # for informal listening tests, we need to high pass and normalize the # amplitude. upper = np.maximum(audio[:, 0].max(), out.max()) sig_in = pra.highpass(audio[:, 0].astype(np.float) / upper, fs_snd, fc=150) sig_out = pra.highpass(out / upper, fs_snd, fc=150) f1 = os.path.join(args.save_sample, '{}_ch0_SIR_{}_dB.wav'.format(mic, SIR)) wavfile.write(f1, fs_snd, sig_in) f2 = os.path.join(args.save_sample, '{}_out_SIR_{}_dB.wav'.format(mic, SIR)) wavfile.write(f2, fs_snd, sig_out) ########## ## PLOT ## ########## if args.plot: plt.figure() plt.plot(out_trunc) plt.plot(speech_ref[:, 0]) plt.legend(['output', 'reference']) # time axis for plotting led_time = np.arange(leds.shape[0]) / fs_led + 1 / (2 * fs_led) audio_time = np.arange(n_samples) / fs_snd plt.figure() plt.plot(led_time, leds, 'r') plt.title('LED signal') # match the scales of VAD and light to sound before plotting q_vad = np.max(audio) q_led = np.max(audio) / np.max(leds) plt.figure() plt.plot(audio_time, audio[:, 0], 'b') plt.plot(led_time, leds * q_led, 'r') plt.plot(audio_time, vad_snd * q_vad, 'g') plt.plot(audio_time, vad_guarded * q_vad, 'g--') plt.legend(['audio', 'VAD']) plt.title('LED and audio signals') plt.figure() a_time = np.arange(audio.shape[0]) / fs_snd plt.plot(a_time, audio[:, 0]) plt.plot(a_time, out_trunc) #plt.plot(a_time, speech_ref[:,0]) plt.legend(['channel 0', 'beamformer output', 'speech reference']) plt.figure() mic_array.plot_beam_response() plt.vlines( [180 + np.degrees(theta_speech), 180 - np.degrees(theta_noise)], 0, nfft // 2) room = pra.ShoeBox(protocol['geometry']['room'][:2], fs=16000, max_order=1) room.add_source(noise_loc[:2]) # noise room.add_source(speech_loc[:2]) # speech room.add_source( protocol['geometry']['speakers']['locations'][1][:2]) # signal room.add_microphone_array(mic_array) room.plot(img_order=1, freq=[800, 1000, 1200, 1400, 1600, 2500, 4000]) plt.figure() mic_array.plot() plt.show() # Return SDR and SIR return SDR_out, SIR_out
# fix the randomness for repeatability np.random.seed(10) # set the source powers, the first one is half source_std = np.ones(n_sources_target) source_std[0] /= np.sqrt(2.0) SIR = 10 # dB SNR = ( 60 ) # dB, this is the SNR with respect to a single target source and microphone self-noise # STFT parameters framesize = 4096 win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2) # algorithm parameters n_iter = 51 n_nmf_sub_iter = 20 sparse_reg = 0.0 # pre-emphasis of blinky signals pre_emphasis = False # Geometry of the room and location of sources and microphones room_dim = np.array([10, 7.5, 3]) mic_locs = np.vstack(( pra.circular_2D_array([4.1, 3.76], n_mics, np.pi / 2, 0.02),
import numpy as np import matplotlib import matplotlib.pyplot as plt from scipy.linalg import toeplitz from scipy.io import wavfile from scipy.signal import resample,fftconvolve import pyroomacoustics as pra # Spectrogram figure properties figsize=(15, 7) # figure size fft_size = 512 # fft size for analysis fft_hop = 8 # hop between analysis frame fft_zp = 512 # zero padding analysis_window = np.concatenate((pra.hann(fft_size), np.zeros(fft_zp))) t_cut = 0.83 # length in [s] to remove at end of signal (no sound) # Some simulation parameters Fs = 8000 t0 = 1./(Fs*np.pi*1e-2) # starting time function of sinc decay in RIR response absorption = 0.90 max_order_sim = 10 sigma2_n = 5e-7 # Room 1 : Shoe box room_dim = [4, 6] # the good source is fixed for all good_source = np.array([1, 4.5]) # good source normal_interferer = np.array([2.8, 4.3]) # interferer
""" Length of filter in time domain = <fft_size> / <samp_freq> * <num_taps> """ # the unknown filters in the frequency domain num_bands = fft_length//2+1 W = np.random.randn(num_taps,num_bands) + \ 1j*np.random.randn(num_taps,num_bands) W /= np.linalg.norm(W, axis=0) # create a known driving signal x = np.random.randn(n_samples) # take to STFT domain window = pra.hann(fft_length) # the analysis window hop = fft_length//2 stft_in = pra.transform.STFT(fft_length, hop=hop, analysis_window=window, channels=1) stft_out = pra.transform.STFT(fft_length, hop=hop, analysis_window=window, channels=1) n = 0 num_blocks = 0 X_concat = np.zeros((num_bands,n_samples//hop),dtype=np.complex64) while n_samples - n > hop: stft_in.analysis(x[n:n+hop,]) X_concat[:,num_blocks] = stft_in.X n += hop
def createroom(amBird, saBird, noises, mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim, absorption, max_order, n_mics, angle): np.random.seed(10) # STFT parameters framesize = 4096 win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2) # algorithm parameters # param ogive ogive_mu = 0.1 ogive_update = "switching" ogive_iter = 2000 ########separation params############## algo = algo_choices[0] no_cb = True save = True n_iter = 60 dist = "gauss" # guass or laplace ########paramas set################## fs = 44100 snr = 60 sinr = 10 # absorption, max_order = 0.45, 12 # RT60 == 0.2 # absorption,max_order=0.9,17 n_sources = 2 + 3 n_mics = n_mics n_sources_target = 2 assert n_sources_target <= n_mics, "More sources than microphones is not supported" # set the source powers, the first one is half source_std = np.ones(n_sources_target) # position #room size room_dim = roomdim #micro position rot = angle offset = np.pi - rot / 2 mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics, rot=offset) # micro2 # mic_locs = np.transpose([[13, 9.99, 3.5],[13, 10, 3.5],[13, 10.01, 3.5]])###micro3 # targent position target_locs = np.transpose([[7, 10, 6], [9, 16, 6]]) # inferences position interferer_locs = random_layout([16, 2, 6], n_sources - n_sources_target, offset=[5, 18, 3], seed=1) source_locs = np.concatenate((target_locs, interferer_locs), axis=1) # audios loaded wav_files = [amBird, saBird, noises[0], noises[1], noises[2]] signals = wav_read_center(wav_files, seed=123) # create room room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption, max_order=max_order, air_absorption=True, humidity=50) # add source for sig, loc in zip(signals, source_locs.T): room.add_source(loc, signal=sig) # add micro room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs)) # power set premix = room.simulate(return_premix=True) n_samples = premix.shape[2] # Normalize the signals so that they all have unit variance at the reference microphone ref_mic = 0 p_mic_ref = np.std(premix[:, ref_mic, :], axis=1) premix /= p_mic_ref[:, None, None] sources_var = np.ones(n_sources_target) # scale to pre-defined variance premix[:n_sources_target, :, :] *= np.sqrt(sources_var[:, None, None]) # compute noise variance sigma_n = np.sqrt(10**(-snr / 10) * np.sum(sources_var)) # now compute the power of interference signal needed to achieve desired SINR sigma_i = np.sqrt( np.maximum(0, 10**(-sinr / 10) * np.sum(sources_var) - sigma_n**2) / (n_sources - n_sources_target)) premix[n_sources_target:, :, :] *= sigma_i background = (np.sum(premix[n_sources_target:, :, :], axis=0)) # Mix down the recorded signals mix = np.sum(premix, axis=0) mics_signals = room.mic_array.signals print("Simulation done.") # rt60 = room.measure_rt60() # print(rt60) # Monitor Convergence ref = np.zeros((n_sources_target + 1, premix.shape[2], premix.shape[1]), dtype=premix.dtype) ref[:n_sources_target, :, :] = premix[:n_sources_target, :, :].swapaxes( 1, 2) ref[n_sources_target, :, :] = background.T convergence_callback = None # START BSS # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis(mics_signals.T, framesize, framesize // 2, win=win_a).astype(np.complex128) X_mics = X_all[:, :, :n_mics] # Run BSS if algo == "auxiva": # Run AuxIVA Y = overiva( X_mics, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "auxiva_pca": # Run AuxIVA Y = auxiva_pca( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "overiva": # Run AuxIVA Y = overiva( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ilrma": # Run AuxIVA Y = pra.bss.ilrma( X_mics, n_iter=n_iter, n_components=2, proj_back=True, callback=convergence_callback, ) elif algo == "ogive": # Run OGIVE Y = ogive( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ogive_matlab": # Run OGIVE Y = ogive_matlab_wrapper( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, init_eig=(init == init_choices[1]), callback=convergence_callback, ) else: raise ValueError("No such algorithm {}".format(algo)) # Run iSTFT if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[:, None] y = y.astype(np.float64) else: y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype(np.float64) # If some of the output are uniformly zero, just add a bit of noise to compare for k in range(y.shape[1]): if np.sum(np.abs(y[:, k])) < 1e-10: y[:, k] = np.random.randn(y.shape[0]) * 1e-10 # For conventional methods of BSS, reorder the signals by decreasing power if algo != "blinkiva": new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] # Compare SIR m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_sources_target, :m, 0], y[framesize // 2:m + framesize // 2, :n_sources_target].T, ) # reorder the vector of reconstructed signals y_hat = y[:, perm] return pra.normalize(mics_signals, bits=16).astype(np.int16).T, y_hat, sir, sdr
def with_arbitrary_overlap_synthesis_window(D, num_frames=1, fixed_memory=False, streaming=True, overlap=0.5): """ D - number of channels num_frames - how many frames to process, None will process one frame at a time fixed_memory - whether to enforce checks for size (real-time consideration) streaming - whether or not to stitch between frames """ if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] # parameters block_size = 512 # make sure the FFT size is a power of 2 hop = int((1 - overlap) * block_size) # quarter overlap if not streaming: num_samples = (num_frames - 1) * hop + block_size x_local = x_local[:num_samples, ] analysis_window = pra.hann(block_size) synthesis_window = pra.realtime.compute_synthesis_window( analysis_window, hop) # Create the STFT object if fixed_memory: stft = STFT(block_size, hop=hop, channels=D, transform=transform, num_frames=num_frames, analysis_window=analysis_window, synthesis_window=synthesis_window, streaming=streaming) else: stft = STFT(block_size, hop=hop, channels=D, analysis_window=analysis_window, synthesis_window=synthesis_window, transform=transform, streaming=streaming) # collect the processed blocks processed_x = np.zeros(x_local.shape) if streaming: n = 0 hop_frames = hop * num_frames # process the signals while full blocks are available while x_local.shape[0] - n > hop_frames: stft.analysis(x_local[n:n + hop_frames, ]) processed_x[n:n + hop_frames, ] = stft.synthesis() n += hop_frames error = np.max( np.abs(x_local[:n - block_size + hop, ] - processed_x[block_size - hop:n, ])) if 20 * np.log10(error) > -10: import matplotlib.pyplot as plt if x_local.ndim == 1: plt.plot(x_local[:n - block_size + hop]) plt.plot(processed_x[block_size - hop:n]) else: plt.plot(x_local[:n - block_size + hop, 0]) plt.plot(processed_x[block_size - hop:n, 0]) plt.show() else: stft.analysis(x_local) processed_x = stft.synthesis() n = processed_x.shape[0] L = block_size - hop error = np.max(np.abs(x_local[L:-L, ] - processed_x[L:, ])) if 20 * np.log10(error) > -10: import matplotlib.pyplot as plt if x_local.ndim == 1: plt.plot(x_local[L:-L]) plt.plot(processed_x[L:]) else: plt.plot(x_local[L:-L, 0]) plt.plot(processed_x[L:, 0]) plt.show() return error
from __future__ import division, print_function from unittest import TestCase import numpy as np import pyroomacoustics as pra tol = -80 # dB nfft = 128 D = 7 x = np.random.randn(nfft, D).astype('float32') X_numpy = np.fft.rfft(x, axis=0).astype('complex64') analysis_window = pra.hann(nfft) synthesis_window = pra.hann(nfft) def no_window(nfft, D, transform, axis=0): if D == 1: x_local = x[:, 0] X_local = X_numpy[:, 0] else: if axis == 0: x_local = x X_local = X_numpy else: x_local = x.T X_local = X_numpy.T # make object dft = pra.transform.DFT(nfft, D, transform=transform, axis=axis) # forward
def run(args, parameters): """ This is the core loop of the simulation """ # expand arguments sinr, n_targets, n_interf, n_mics, dist_ratio, room_params, seed = args n_sources = n_targets + n_interf # this is the underdetermined case. We don't do that. if n_mics < n_targets: return [] # set the RNG seed rng_state = np.random.get_state() np.random.seed(seed) # get all the signals files_absolute = [ os.path.join(parameters["base_dir"], fn) for fn in room_params["wav"][:n_sources] ] source_signals = wav_read_center(files_absolute, seed=123) # create the room room = pra.ShoeBox(**room_params["room_kwargs"]) R = np.array(room_params["mic_array"]) room.add_microphone_array(pra.MicrophoneArray(R[:, :n_mics], room.fs)) source_locs = np.array(room_params["sources"]) for n in range(n_sources): room.add_source(source_locs[:, n], signal=source_signals[n, :]) # compute RIRs and RT60 room.compute_rir() rt60 = np.median([ pra.experimental.measure_rt60(room.rir[0][n], fs=room.fs) for n in range(n_targets) ]) # signals after propagation but before mixing # (n_sources, n_mics, n_samples) premix = room.simulate(return_premix=True) n_samples = premix.shape[-1] # create the mix (n_mics, n_samples) # this routine will also resize the signals in premix mix = callback_noise_mixer(premix, sinr=sinr, n_src=n_targets + n_interf, n_tgt=n_targets, **parameters["mix_params"]) # create the reference signals # (n_sources + 1, n_samples) refs = np.zeros((n_targets + 1, n_samples)) refs[:-1, :] = premix[:n_targets, parameters["mix_params"]["ref_mic"], :] refs[-1, :] = np.sum(premix[n_targets:, 0, :], axis=0) # STFT parameters framesize = parameters["stft_params"]["framesize"] hop = parameters["stft_params"]["hop"] if parameters["stft_params"]["window"] == "hann": win_a = pra.hamming(framesize) else: # default is Hann win_a = pra.hann(framesize) # START BSS ########### # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis(mix.T, framesize, hop, win=win_a) X_mics = X_all[:, :, :n_mics] # store results in a list, one entry per algorithm results = [] # compute the initial values of SDR/SIR init_sdr = [] init_sir = [] for full_name, params in parameters["algorithm_kwargs"].items(): name = params["algo"] kwargs = params["kwargs"] if not bss.is_determined[name] and bss.is_dual_update[ name] and n_targets == 1: # Overdetermined algorithms with dual updates cannot be used # in the single source case (they can extract at least two sources) continue elif bss.is_single_source[name] and n_targets > 1: # doesn't work for multi source scenario continue elif bss.is_overdetermined[name] and n_targets == n_mics: # don't run the overdetermined stuff in determined case continue results.append({ "algorithm": full_name, "n_targets": n_targets, "n_interferers": n_interf, "n_mics": n_mics, "rt60": rt60, "dist_ratio": dist_ratio, "sinr": sinr, "seed": seed, "sdr": [], "sir": [], # to store the result "cost": [], "runtime": np.nan, "eval_time": np.nan, "n_samples": n_samples, }) # this is used to keep track of time spent in the evaluation callback eval_time = [] def cb(W, Y, source_model): convergence_callback( W, Y, source_model, X_mics, n_targets, results[-1]["sdr"], results[-1]["sir"], results[-1]["cost"], eval_time, refs, parameters["mix_params"]["ref_mic"], parameters["stft_params"], name, not bss.is_determined[name], ) if "model" not in kwargs: local_model = bss.default.model else: local_model = kwargs["model"] cb(np.eye(n_mics)[None, :, :], X_mics, local_model) try: t_start = time.perf_counter() bss.separate(X_mics, n_src=n_targets, algorithm=name, callback=cb, proj_back=False, **kwargs) t_finish = time.perf_counter() results[-1]["eval_time"] = np.sum(eval_time) results[-1][ "runtime"] = t_finish - t_start - results[-1]["eval_time"] except Exception: # get the traceback tb = traceback.format_exc() report = { "algorithm": name, "n_src": n_targets, "kwargs": kwargs, "result": results[-1], "tb": tb, } pid = os.getpid() # report last sdr/sir as np.nan results[-1]["sdr"].append(np.nan) results[-1]["sir"].append(np.nan) # now write the problem to file fn_err = os.path.join(parameters["_results_dir"], "error_{}.json".format(pid)) with open(fn_err, "a") as f: f.write(json.dumps(report, indent=4)) f.write(",\n") # skip to next iteration continue # restore RNG former state np.random.set_state(rng_state) return results
from unittest import TestCase import numpy as np from scipy.signal import fftconvolve import pyroomacoustics as pra # fix seed for repeatability np.random.seed(0) h_len = 30 x_len = 1000 SNR = 1000.0 # decibels h_lp = np.fft.irfft(np.ones(5), n=h_len) h_rand = np.random.randn(h_len) h_hann = pra.hann(h_len, flag="symmetric") x = np.random.randn(x_len) noise = np.random.randn(x_len + h_len - 1) def generate_signals(SNR, x, h, noise): """run convolution""" # noise standard deviation sigma_noise = 10**(-SNR / 20.0) y = fftconvolve(x, h) y += sigma_noise * noise return y, sigma_noise
from unittest import TestCase import numpy as np from scipy.signal import fftconvolve import pyroomacoustics as pra # fix seed for repeatability np.random.seed(0) h_len = 30 x_len = 1000 SNR = 1000. # decibels h_lp = np.fft.irfft(np.ones(5), n=h_len) h_rand = np.random.randn(h_len) h_hann = pra.hann(h_len, flag='symmetric') x = np.random.randn(x_len) noise = np.random.randn(x_len + h_len - 1) def generate_signals(SNR, x, h, noise): ''' run convolution ''' # noise standard deviation sigma_noise = 10**(-SNR / 20.) y = fftconvolve(x, h) y += sigma_noise * noise return y, sigma_noise
from __future__ import division, print_function import numpy as np from scipy.io import wavfile import matplotlib.pyplot as plt import pyroomacoustics as pra import os # filter to apply h_len = 99 h = np.ones(h_len) h /= np.linalg.norm(h) # parameters block_size = 512 - h_len + 1 # make sure the FFT size is a power of 2 hop = block_size // 2 # half overlap window = pra.hann(block_size, flag='asymmetric', length='full') # analysis window (no synthesis window) # open single channel audio file fn = os.path.join(os.path.dirname(__file__), 'input_samples', 'singing_8000.wav') fs, audio = wavfile.read(fn) # Create the STFT object stft = pra.transform.STFT(block_size, hop=hop, analysis_window=window, channels=1, streaming=True) # set the filter and the appropriate amount of zero padding (back) if h_len > 1:
def one_loop(args): global parameters import time import numpy np = numpy import pyroomacoustics pra = pyroomacoustics import os import sys sys.path.append(parameters["base_dir"]) from auxiva_pca import auxiva_pca, pca_separation from five import five from ive import ogive from overiva import overiva from pyroomacoustics.bss.common import projection_back from room_builder import callback_noise_mixer, random_room_builder # import samples helper routine from get_data import samples_dir sys.path.append(os.path.join(parameters['base_dir'], samples_dir)) from generate_samples import wav_read_center n_targets, n_interferers, n_mics, sinr, wav_files, room_seed, seed = args # this is the underdetermined case. We don't do that. if n_mics < n_targets: return [] # set MKL to only use one thread if present try: import mkl mkl.set_num_threads(1) except ImportError: pass # set the RNG seed rng_state = np.random.get_state() np.random.seed(seed) # STFT parameters framesize = parameters["stft_params"]["framesize"] hop = parameters["stft_params"]["hop"] if parameters["stft_params"]["window"] == "hann": win_a = pra.hamming(framesize) else: # default is Hann win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, hop) # Generate the audio signals # get the simulation parameters from the json file # Simulation parameters sources_var = np.ones(n_targets) # total number of sources n_sources = n_targets + n_interferers # Read the signals wav_files = [os.path.join(parameters["base_dir"], fn) for fn in wav_files] signals = wav_read_center(wav_files[:n_sources], seed=123) # Get a random room room, rt60 = random_room_builder(signals, n_mics, seed=room_seed, **parameters["room_params"]) premix = room.simulate(return_premix=True) # mix the signal n_samples = premix.shape[2] mix = callback_noise_mixer( premix, sinr=sinr, diffuse_ratio=parameters["sinr_diffuse_ratio"], n_src=n_sources, n_tgt=n_targets, tgt_std=np.sqrt(sources_var), ref_mic=parameters["ref_mic"], ) # sum up the background # shape (n_mics, n_samples) background = np.sum(premix[n_targets:n_sources, :, :], axis=0) # shape (n_targets+1, n_samples, n_mics) ref = np.zeros((n_targets + 1, premix.shape[2], premix.shape[1]), dtype=premix.dtype) ref[:n_targets, :, :] = premix[:n_targets, :, :].swapaxes(1, 2) ref[n_targets, :, :] = background.T synth = np.zeros_like(ref) # START BSS ########### # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis(mix.T, framesize, hop, win=win_a) X_mics = X_all[:, :, :n_mics] # convergence monitoring callback def convergence_callback(Y, X, n_targets, SDR, SIR, eval_time, ref, framesize, win_s, algo_name): t_in = time.perf_counter() # projection back z = projection_back(Y, X[:, :, 0]) Y = Y * np.conj(z[None, :, :]) from mir_eval.separation import bss_eval_sources if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None] else: y = pra.transform.synthesis(Y, framesize, hop, win=win_s) if algo_name not in parameters["overdet_algos"]: new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] m = np.minimum(y.shape[0] - hop, ref.shape[1]) synth[:n_targets, :m, 0] = y[hop:m + hop, :n_targets].T synth[n_targets, :m, 0] = y[hop:m + hop, 0] sdr, sir, sar, perm = bss_eval_sources(ref[:n_targets + 1, :m, 0], synth[:, :m, 0]) SDR.append(sdr[:n_targets].tolist()) SIR.append(sir[:n_targets].tolist()) t_out = time.perf_counter() eval_time.append(t_out - t_in) # store results in a list, one entry per algorithm results = [] # compute the initial values of SDR/SIR init_sdr = [] init_sir = [] convergence_callback(X_mics, X_mics, n_targets, init_sdr, init_sir, [], ref, framesize, win_s, "init") for full_name, params in parameters["algorithm_kwargs"].items(): name = params["algo"] kwargs = params["kwargs"] if name == "auxiva_pca" and n_targets == 1: # PCA doesn't work for single source scenario continue elif name in ["ogive", "five"] and n_targets != 1: # OGIVE is only for single target continue results.append({ "algorithm": full_name, "n_targets": n_targets, "n_interferers": n_interferers, "n_mics": n_mics, "rt60": rt60, "sinr": sinr, "seed": seed, "sdr": [], "sir": [], # to store the result "runtime": np.nan, "eval_time": np.nan, "n_samples": n_samples, }) # this is used to keep track of time spent in the evaluation callback eval_time = [] def cb(Y): convergence_callback( Y, X_mics, n_targets, results[-1]["sdr"], results[-1]["sir"], eval_time, ref, framesize, win_s, name, ) # avoid one computation by using the initial values of sdr/sir results[-1]["sdr"].append(init_sdr[0]) results[-1]["sir"].append(init_sir[0]) try: t_start = time.perf_counter() if name == "auxiva": # Run AuxIVA # this calls full IVA when `n_src` is not provided Y = overiva(X_mics, callback=cb, **kwargs) elif name == "auxiva_pca": # Run AuxIVA Y = auxiva_pca(X_mics, n_src=n_targets, callback=cb, proj_back=False, **kwargs) elif name == "overiva": # Run BlinkIVA Y = overiva(X_mics, n_src=n_targets, callback=cb, proj_back=False, **kwargs) elif name == "overiva2": # Run BlinkIVA Y = overiva(X_mics, n_src=n_targets, callback=cb, proj_back=False, **kwargs) elif name == "five": # Run AuxIVE Y = five(X_mics, callback=cb, proj_back=False, **kwargs) elif name == "ilrma": # Run AuxIVA Y = pra.bss.ilrma(X_mics, callback=cb, proj_back=False, **kwargs) elif name == "ogive": # Run OGIVE Y = ogive(X_mics, callback=cb, proj_back=False, **kwargs) elif name == "pca": # Run PCA Y = pca_separation(X_mics, n_src=n_targets) else: continue t_finish = time.perf_counter() # The last evaluation convergence_callback( Y, X_mics, n_targets, results[-1]["sdr"], results[-1]["sir"], [], ref, framesize, win_s, name, ) results[-1]["eval_time"] = np.sum(eval_time) results[-1][ "runtime"] = t_finish - t_start - results[-1]["eval_time"] except: import os, json pid = os.getpid() # report last sdr/sir as np.nan results[-1]["sdr"].append(np.nan) results[-1]["sir"].append(np.nan) # now write the problem to file fn_err = os.path.join(parameters["_results_dir"], "error_{}.json".format(pid)) with open(fn_err, "a") as f: f.write(json.dumps(results[-1], indent=4)) # skip to next iteration continue # restore RNG former state np.random.set_state(rng_state) return results
def apply_iterative_wiener(noisy_signal, frame_len=512, lpc_order=20, iterations=2, alpha=0.8, thresh=0.01): """ One-shot function to apply iterative Wiener filtering for denoising. Parameters ---------- noisy_signal : numpy array Real signal in time domain. frame_len : int Frame length in samples. 50% overlap is used with hanning window. lpc_order : int Number of LPC coefficients to compute iterations : int How many iterations to perform in updating the Wiener filter for each signal frame. alpha : int Smoothing factor within [0,1] for updating noise level. Closer to `1` gives more weight to the previous noise level, while closer to `0` gives more weight to the current frame's level. Closer to `0` can track more rapid changes in the noise level. However, if a speech frame is incorrectly identified as noise, you can end up removing desired speech. thresh : float Threshold to distinguish between (signal+noise) and (noise) frames. A high value will classify more frames as noise but might remove desired signal! Returns ------- numpy array Enhanced/denoised signal. """ from pyroomacoustics import hann from pyroomacoustics.transform import STFT hop = frame_len // 2 window = hann(frame_len, flag='asymmetric', length='full') stft = STFT(frame_len, hop=hop, analysis_window=window, streaming=True) scnr = IterativeWiener(frame_len, lpc_order, iterations, alpha, thresh) processed_audio = np.zeros(noisy_signal.shape) n = 0 while noisy_signal.shape[0] - n >= hop: # SCNR in frequency domain stft.analysis(noisy_signal[n:(n + hop), ]) X = scnr.compute_filtered_output(current_frame=stft.fft_in_buffer, frame_dft=stft.X) # back to time domain processed_audio[n:n + hop, ] = stft.synthesis(X) # update step n += hop return processed_audio
def createroom(mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim, absorption, max_order, n_mics, angle): np.random.seed(10) # STFT parameters framesize = 4096 win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2) # algorithm parameters # param ogive ogive_mu = 0.1 ogive_update = "switching" ogive_iter = 2000 SIR = 10 # dB SNR = ( 60 ) # dB, this is the SNR with respect to a single target source and microphone self-noise ########separation params############# algo = algo_choices[0] no_cb = True save = True n_iter = 60 dist = "gauss" #guass or laplace ########paramas set################## fs = 44100 n_sources = 2 n_mics = n_mics n_sources_target = 2 assert n_sources_target <= n_mics, "More sources than microphones is not supported" # set the source powers, the first one is half source_std = np.ones(n_sources_target) # room size room_dim = roomdim # micro position rot = angle offset = np.pi - rot / 2 mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics, rot=offset) ###micro2 # target position target_locs = np.transpose([[7, 10, 6], [9, 16, 6]]) #interference position interferer_locs = random_layout([14, 0, 6], n_sources - n_sources_target, offset=[5, 20, 3], seed=1) source_locs = target_locs # audio loaded wav_files = [amBird, saBird] signals = wav_read_center(wav_files, seed=123) #create room room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption, max_order=max_order, air_absorption=True, humidity=50) # add source for sig, loc in zip(signals, source_locs.T): room.add_source(loc, signal=sig) # add micro room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs)) callback_mix_kwargs = { "snr": SNR, "sir": SIR, "n_src": n_sources, "n_tgt": n_sources_target, "src_std": source_std, "ref_mic": 0, } # Run the simulation separate_recordings = room.simulate( callback_mix=callback_mix, callback_mix_kwargs=callback_mix_kwargs, return_premix=True, ) mics_signals = room.mic_array.signals print("Simulation done.") # rt60 = room.measure_rt60() # print(rt60) # Monitor Convergence ref = np.moveaxis(separate_recordings, 1, 2) if ref.shape[0] < n_mics: ref = np.concatenate( (ref, np.random.randn(n_mics - ref.shape[0], ref.shape[1], ref.shape[2])), axis=0, ) SDR, SIR, cost_func = [], [], [] convergence_callback = None # START BSS # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis(mics_signals.T, framesize, framesize // 2, win=win_a).astype(np.complex128) X_mics = X_all[:, :, :n_mics] tic = time.perf_counter() # Run BSS if algo == "auxiva": # Run AuxIVA Y = overiva( X_mics, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "auxiva_pca": # Run AuxIVA Y = auxiva_pca( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "overiva": # Run AuxIVA Y = overiva( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ilrma": # Run AuxIVA Y = pra.bss.ilrma( X_mics, n_iter=n_iter, n_components=2, proj_back=True, callback=convergence_callback, ) elif algo == "ogive": # Run OGIVE Y = ogive( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ogive_matlab": # Run OGIVE Y = ogive_matlab_wrapper( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, init_eig=(init == init_choices[1]), callback=convergence_callback, ) else: raise ValueError("No such algorithm {}".format(algo)) toc = time.perf_counter() # Run iSTFT if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[:, None] y = y.astype(np.float64) else: y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype(np.float64) # If some of the output are uniformly zero, just add a bit of noise to compare for k in range(y.shape[1]): if np.sum(np.abs(y[:, k])) < 1e-10: y[:, k] = np.random.randn(y.shape[0]) * 1e-10 # For conventional methods of BSS, reorder the signals by decreasing power if algo != "blinkiva": new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] # Compare SIR m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_sources_target, :m, 0], y[framesize // 2:m + framesize // 2, :n_sources_target].T, ) # reorder the vector of reconstructed signals y_hat = y[:, perm] print("SDR:", sdr) print("SIR:", sir) ####save mix and separation ####### if save: from scipy.io import wavfile wavfile.write( "birdmix.wav", room.fs, (pra.normalize(mics_signals, bits=16).astype(np.int16).T)[:, 0], ) for i, sig in enumerate(y_hat.T): wavfile.write( "birdsep{}.wav".format(i + 1), room.fs, pra.normalize(sig, bits=16).astype(np.int16).T, )
from __future__ import division, print_function import numpy as np from scipy.io import wavfile import matplotlib.pyplot as plt import pyroomacoustics as pra import os # filter to apply h_len = 99 h = np.ones(h_len) h /= np.linalg.norm(h) # parameters block_size = 512 - h_len + 1 # make sure the FFT size is a power of 2 hop = block_size // 2 # half overlap window = pra.hann(block_size, flag="asymmetric", length="full") # analysis window (no synthesis window) # open single channel audio file fn = os.path.join(os.path.dirname(__file__), "input_samples", "singing_8000.wav") fs, audio = wavfile.read(fn) # Create the STFT object stft = pra.transform.STFT(block_size, hop=hop, analysis_window=window, channels=1, streaming=True) # set the filter and the appropriate amount of zero padding (back) if h_len > 1:
import numpy as np import matplotlib import matplotlib.pyplot as plt from scipy.linalg import toeplitz from scipy.io import wavfile from scipy.signal import resample, fftconvolve import pyroomacoustics as pra import TDBeamformers as tdb # Spectrogram figure properties figsize = (15, 7) # figure size fft_size = 512 # fft size for analysis fft_hop = 8 # hop between analysis frame fft_zp = 512 # zero padding analysis_window = np.concatenate((pra.hann(fft_size), np.zeros(fft_zp))) t_cut = 0.83 # length in [s] to remove at end of signal (no sound) # Some simulation parameters Fs = 8000 t0 = 1. / (Fs * np.pi * 1e-2 ) # starting time function of sinc decay in RIR response absorption = 0.90 max_order_sim = 10 sigma2_n = 5e-7 # Room 1 : Shoe box room_dim = [4, 6] # the good source is fixed for all good_source = np.array([1, 4.5]) # good source
def with_half_overlap_with_filter(D, num_frames=1, fixed_memory=False, streaming=True): """ D - number of channels num_frames - how many frames to process, None will process one frame at a time fixed_memory - whether to enforce checks for size (real-time consideration) streaming - whether or not to stitch between frames """ if D == 1: x_local = x[:, 0] y_local = y[:, 0] h_local = h[:, 0] else: x_local = x[:, :D] y_local = y[:, :D] h_local = h[:, :D] # parameters block_size = 512 - h_len + 1 # make sure the FFT size is a power of 2 hop = block_size // 2 # half overlap window = pra.hann(block_size) # the analysis window if not streaming: num_samples = (num_frames - 1) * hop + block_size x_local = x_local[:num_samples, ] # Create the STFT object if fixed_memory: stft = STFT(block_size, hop=hop, channels=D, transform=transform, num_frames=num_frames, analysis_window=window, streaming=streaming) else: stft = STFT(block_size, hop=hop, channels=D, transform=transform, analysis_window=window, streaming=streaming) # setup the filter stft.set_filter(h_local, zb=h_len - 1) # collect the processed blocks processed_x = np.zeros(x_local.shape) if not streaming: stft.analysis(x_local) stft.process() processed_x = stft.synthesis() n = processed_x.shape[0] error = np.max( np.abs(y_local[block_size:n - block_size, ] - processed_x[block_size:n - block_size, ])) else: n = 0 hop_frames = hop * num_frames # process the signals while full blocks are available while x_local.shape[0] - n > hop_frames: stft.analysis(x_local[n:n + hop_frames, ]) stft.process() # apply the filter processed_x[n:n + hop_frames, ] = stft.synthesis() n += hop_frames error = np.max(np.abs(y_local[:n - hop, ] - processed_x[hop:n, ])) # if D==1: # import matplotlib.pyplot as plt # plt.figure() # plt.plot(y_local) # plt.plot(processed_x) # plt.show() return error
noise_fp = os.path.join(os.path.dirname(__file__), "input_samples", "doing_the_dishes.wav") noisy_signal, signal, noise, fs = pra.create_noisy_signal(signal_fp, snr=snr, noise_fp=noise_fp) wavfile.write( os.path.join(os.path.dirname(__file__), "output_samples", "denoise_input_SpectralSub.wav"), fs, noisy_signal.astype(np.float32), ) """ Create STFT and SCNR objects """ hop = nfft // 2 window_a = pra.hann(nfft) window_s = pra.transform.stft.compute_synthesis_window(window_a, hop) stft = pra.transform.STFT(nfft, hop=hop, analysis_window=window_a, synthesis_window=window_s, streaming=True) scnr = SpectralSub(nfft, db_reduc, lookback, beta, alpha) lookback_time = hop / fs * lookback print("Lookback : %f seconds" % lookback_time) """ Process as in real-time """ # collect the processed blocks processed_audio = np.zeros(signal.shape)
) # Read in the pyramic microphone locations with open('pyramic.json') as f: data = json.load(f) array = np.array(data['pyramic']).T # Position the array in the room array -= array.mean(axis=1, keepdims=True) array += np.array([[5.5, 5.3, 1.1]]).T room.add_microphone_array(pra.MicrophoneArray(array, room.fs)) #################### # Prepare the STFT # awin = pra.hann(nfft) swin = pra.transform.compute_synthesis_window(awin, shift) stft_input = pra.transform.STFT( nfft, shift, analysis_window=awin, synthesis_window=swin, channels=array.shape[1], ) stft_output = pra.transform.STFT( nfft, shift, analysis_window=awin, synthesis_window=swin, channels=1, )
noisy_signal, signal, noise, fs = pra.create_noisy_signal(signal_fp, snr=snr, noise_fp=noise_fp) wavfile.write( os.path.join(os.path.dirname(__file__), 'output_samples', 'denoise_input_IterativeWiener.wav'), fs, noisy_signal.astype(np.float32)) """ Apply approach """ scnr = IterativeWiener(frame_len, lpc_order, iterations, alpha, threshold) # derived parameters hop = frame_len // 2 win = pra.hann(frame_len, flag='asymmetric', length='full') stft = pra.transform.STFT(frame_len, hop=hop, analysis_window=win, streaming=True) speech_psd = np.ones(hop + 1) # initialize PSD noise_psd = 0 start_time = time.time() processed_audio = np.zeros(noisy_signal.shape) n = 0 while noisy_signal.shape[0] - n >= hop: # to frequency domain, 50% overlap stft.analysis(noisy_signal[n:(n + hop), ])
parser.add_argument('--save', action='store_true', help='Saves the output of the separation to wav files') args = parser.parse_args() if args.gui: # avoids a bug with tkinter and matplotlib import matplotlib matplotlib.use('TkAgg') import pyroomacoustics as pra ## Prepare one-shot STFT L = args.block hop = L // 2 win_a = pra.hann(L) win_s = pra.transform.compute_synthesis_window(win_a, hop) ## Create a room with sources and mics # Room dimensions in meters room_dim = [8, 9] # source location source = np.array([1, 4.5]) room = pra.ShoeBox(room_dim, fs=16000, max_order=15, absorption=0.35, sigma2_awgn=1e-8) # get signals
def convergence_callback( Y, source_model, X, n_targets, SDR, SIR, cost_list, eval_time, ref_sig, ref_mic, stft_params, algo_name, algo_is_overdetermined, ): global id_wav # we will keep track of how long this routine takes t_in = time.perf_counter() # Compute the current value of the IVA cost function cost_list.append(bss.cost_iva(X, Y, model=source_model)) # prepare STFT parameters framesize = stft_params["framesize"] hop = stft_params["hop"] if stft_params["window"] == "hamming": win_a = pra.hamming(framesize) else: # default is Hann win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, hop) # projection back Y = bss.project_back(Y, X[:, :, ref_mic]) if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None] else: y = pra.transform.synthesis(Y, framesize, hop, win=win_s) y = y[framesize - hop:, :].astype(np.float64) if not algo_is_overdetermined: new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] m = np.minimum(y.shape[0], ref_sig.shape[1]) synth = np.zeros_like(ref_sig) # in the overdetermined case, we also take into account the background for SIR computation synth[:n_targets, :m] = y[:m, :n_targets].T if synth.shape[0] > y.shape[1]: # here we copy the first source to fill the channel of the background synth[n_targets, :m] = y[:m, 0] if ref_sig.shape[0] > n_targets and np.sum(np.abs( ref_sig[n_targets, :])) < 1e-10: sdr, sir, sar, perm = si_bss_eval(ref_sig[:n_targets, :m].T, synth[:-1, :m].T) else: sdr, sir, sar, perm = si_bss_eval(ref_sig[:, :m].T, synth[:, :m].T) SDR.append(sdr[:n_targets].tolist()) SIR.append(sir[:n_targets].tolist()) t_out = time.perf_counter() eval_time.append(t_out - t_in)
# creating a noisy_signal array for each snr value and mic speech_file_location = speech.meta.as_dict()['file_loc'] noise_file_location = noise.meta.as_dict()['file_loc'] noisy_signal = utils.modify_input_wav_multiple_mics( speech_file_location, noise_file_location, room_dim, max_order, snr_vals, mic_array, [2, 3.1, 2], [4, 2, 1.5]) # Create our new samples for each SNR values noisy_single_mic = noisy_signal[:, 0, :] ''' make an STFT object (these class are already implemented in Pyroomacoustics and have example showing how to use them) ''' hop = fft_len // 2 window = pra.hann(fft_len, flag='asymmetric', length='full') stft = pra.realtime.STFT(fft_len, hop=hop, analysis_window=window, channels=1) ''' Processing of our noisy signals contained in the noisy array. ''' # collect the processed block for each of our noisy signal processed_audio_array = np.zeros(noisy_single_mic.shape) # we run the algorithm for each of our possible signal for i, snr in enumerate(snr_vals): n = 0
fs = 44100 room = pra.ShoeBox(room_size, fs, materials=pra.Material(0.1), max_order = 50) room.add_source(source_loc) room.add_microphone_array(pra.MicrophoneArray(mic_loc, fs)) room.compute_rir() #plot spectrograms to check for sweeping echoes fft_size = 512 # fft size for analysis fft_hop = 128 # hop between analysis frame fft_zp = 512 # zero padding analysis_window = pra.hann(fft_size) print("Sweeping echo measure for ISM is :") for n in range(M): if n == 0: S = stft.analysis(room.rir[n][0], fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) f, (ax1, ax2) = plt.subplots(2,1) ax1.imshow( pra.dB(S.T), extent=[0, len(room.rir[n][0]), 0, fs / 2], vmin=-100, vmax=0, origin="lower",