if shape is 'Circular': mics = bf.Beamformer.circular2D(Fs, mic1, M, phi, d * M / (2 * np.pi)) else: mics = bf.Beamformer.linear2D(Fs, mic1, M, phi, d) # define the array processing type L = 4096 # frame length hop = 2048 # hop between frames zp = 2048 # zero padding (front + back) mics.setProcessing('FrequencyDomain', L, hop, zp, zp) # The first signal (of interest) is singing rate1, signal1 = wavfile.read('samples/singing_' + str(Fs) + '.wav') signal1 = np.array(signal1, dtype=float) signal1 = u.normalize(signal1) signal1 = u.highpass(signal1, Fs) delay1 = 0. # the second signal (interferer) is some german speech rate2, signal2 = wavfile.read('samples/german_speech_' + str(Fs) + '.wav') signal2 = np.array(signal2, dtype=float) signal2 = u.normalize(signal2) signal2 = u.highpass(signal2, Fs) delay2 = 1. # create the room with sources and mics room1 = rg.Room.shoeBox2D([0, 0], room_dim, Fs, t0=t0, max_order=max_order_sim,
if shape is 'Circular': mics = bf.Beamformer.circular2D(Fs, mic1, M, phi, d*M/(2*np.pi)) else: mics = bf.Beamformer.linear2D(Fs, mic1, M, phi, d) # define the array processing type L = 4096 # frame length hop = 2048 # hop between frames zp = 2048 # zero padding (front + back) mics.setProcessing('FrequencyDomain', L, hop, zp, zp) # The first signal (of interest) is singing rate1, signal1 = wavfile.read('samples/singing_'+str(Fs)+'.wav') signal1 = np.array(signal1, dtype=float) signal1 = u.normalize(signal1) signal1 = u.highpass(signal1, Fs) delay1 = 0. # the second signal (interferer) is some german speech rate2, signal2 = wavfile.read('samples/german_speech_'+str(Fs)+'.wav') signal2 = np.array(signal2, dtype=float) signal2 = u.normalize(signal2) signal2 = u.highpass(signal2, Fs) delay2 = 1. # create the room with sources and mics room1 = rg.Room.shoeBox2D( [0,0], room_dim, Fs, t0 = t0,
def perceptual_quality_evaluation(good_source, bad_source): ''' Perceputal Quality evaluation simulation Inner Loop ''' # Imports are done in the function so that it can be easily # parallelized import numpy as np from scipy.io import wavfile from scipy.signal import resample from os import getpid from Room import Room from beamforming import Beamformer, MicrophoneArray from trinicon import trinicon from utilities import normalize, to_16b, highpass from phat import time_align from metrics import snr, pesq # number of number of sources n_sources = np.arange(1, 12) S = n_sources.shape[0] # we the speech samples used speech_sample1 = 'samples/fq_sample1_8000.wav' speech_sample2 = 'samples/fq_sample2_8000.wav' # Some simulation parameters Fs = 8000 t0 = 1. / (Fs * np.pi * 1e-2 ) # starting time function of sinc decay in RIR response absorption = 0.90 max_order_sim = 10 SNR_at_mic = 20 # SNR at center of microphone array in dB # Room 1 : Shoe box room_dim = [4, 6] # microphone array design parameters mic1 = [2, 1.5] # position M = 8 # number of microphones d = 0.08 # distance between microphones phi = 0. # angle from horizontal shape = 'Linear' # array shape # create a microphone array if shape is 'Circular': mics = Beamformer.circular2D(Fs, mic1, M, phi, d * M / (2 * np.pi)) else: mics = Beamformer.linear2D(Fs, mic1, M, phi, d) # create a single reference mic at center of array ref_mic = MicrophoneArray(mics.center, Fs) # define the array processing type L = 4096 # frame length hop = 2048 # hop between frames zp = 2048 # zero padding (front + back) mics.setProcessing('FrequencyDomain', L, hop, zp, zp) # data receptacles beamformer_names = ['Rake-DS', 'Rake-MaxSINR', 'Rake-MaxUDR'] bf_weights_fun = [ mics.rakeDelayAndSumWeights, mics.rakeMaxSINRWeights, mics.rakeMaxUDRWeights ] bf_fnames = ['1', '2', '3'] NBF = len(beamformer_names) # receptacle arrays pesq_input = np.zeros(2) pesq_trinicon = np.zeros((2, 2)) pesq_bf = np.zeros((2, NBF, S)) isinr = 0 osinr_trinicon = np.zeros(2) osinr_bf = np.zeros((NBF, S)) # since we run multiple thread, we need to uniquely identify filenames pid = str(getpid()) file_ref = 'output_samples/fqref' + pid + '.wav' file_suffix = '-' + pid + '.wav' files_tri = [ 'output_samples/fqt' + str(i + 1) + file_suffix for i in xrange(2) ] files_bf = [ 'output_samples/fq' + str(i + 1) + file_suffix for i in xrange(NBF) ] file_raw = 'output_samples/fqraw' + pid + '.wav' # Read the two speech samples used rate, good_signal = wavfile.read(speech_sample1) good_signal = np.array(good_signal, dtype=float) good_signal = normalize(good_signal) good_signal = highpass(good_signal, rate) good_len = good_signal.shape[0] / float(Fs) rate, bad_signal = wavfile.read(speech_sample2) bad_signal = np.array(bad_signal, dtype=float) bad_signal = normalize(bad_signal) bad_signal = highpass(bad_signal, rate) bad_len = bad_signal.shape[0] / float(Fs) # variance of good signal good_sigma2 = np.mean(good_signal**2) # normalize interference signal to have equal power with desired signal bad_signal *= good_sigma2 / np.mean(bad_signal**2) # pick good source position at random good_distance = np.linalg.norm(mics.center[:, 0] - np.array(good_source)) # pick bad source position at random bad_distance = np.linalg.norm(mics.center[:, 0] - np.array(bad_source)) if good_len > bad_len: good_delay = 0 bad_delay = (good_len - bad_len) / 2. else: bad_delay = 0 good_delay = (bad_len - good_len) / 2. # compute the noise variance at center of array wrt good signal and SNR sigma2_n = good_sigma2 / (4 * np.pi * good_distance)**2 / 10**(SNR_at_mic / 10) # create the reference room for freespace, noisless, no interference simulation ref_room = Room.shoeBox2D([0, 0], room_dim, Fs, t0=t0, max_order=0, absorption=absorption, sigma2_awgn=0.) ref_room.addSource(good_source, signal=good_signal, delay=good_delay) ref_room.addMicrophoneArray(ref_mic) ref_room.compute_RIR() ref_room.simulate() reference = ref_mic.signals[0] reference_n = normalize(reference) # save the reference desired signal wavfile.write(file_ref, Fs, to_16b(reference_n)) # create the 'real' room with sources and mics room1 = Room.shoeBox2D([0, 0], room_dim, Fs, t0=t0, max_order=max_order_sim, absorption=absorption, sigma2_awgn=sigma2_n) # add sources to room room1.addSource(good_source, signal=good_signal, delay=good_delay) room1.addSource(bad_source, signal=bad_signal, delay=bad_delay) # Record first the degraded signal at reference mic (center of array) room1.addMicrophoneArray(ref_mic) room1.compute_RIR() room1.simulate() raw_n = normalize(highpass(ref_mic.signals[0], Fs)) # save degraded reference signal wavfile.write(file_raw, Fs, to_16b(raw_n)) # Compute PESQ and SINR of raw degraded reference signal isinr = snr(reference_n, raw_n[:reference_n.shape[0]]) pesq_input[:] = pesq(file_ref, file_raw, Fs=Fs).T # Now record input of microphone array room1.addMicrophoneArray(mics) room1.compute_RIR() room1.simulate() # Run the Trinicon algorithm double_sig = mics.signals.copy() for i in xrange(2): double_sig = np.concatenate((double_sig, mics.signals), axis=1) sig_len = mics.signals.shape[1] output_trinicon = trinicon(double_sig)[:, -sig_len:] # normalize time-align and save to file output_tri1 = normalize(highpass(output_trinicon[0, :], Fs)) output_tri1 = time_align(reference_n, output_tri1) wavfile.write(files_tri[0], Fs, to_16b(output_tri1)) output_tri2 = normalize(highpass(output_trinicon[1, :], Fs)) output_tri2 = time_align(reference_n, output_tri2) wavfile.write(files_tri[1], Fs, to_16b(output_tri2)) # evaluate # Measure PESQ and SINR for both output signals, we'll sort out later pesq_trinicon = pesq(file_ref, files_tri, Fs=Fs) osinr_trinicon[0] = snr(reference_n, output_tri1) osinr_trinicon[1] = snr(reference_n, output_tri2) # Run all the beamformers for k, s in enumerate(n_sources): ''' BEAMFORMING PART ''' # Extract image sources locations and create noise covariance matrix good_sources = room1.sources[0].getImages(n_nearest=s, ref_point=mics.center) bad_sources = room1.sources[1].getImages(n_nearest=s, ref_point=mics.center) Rn = sigma2_n * np.eye(mics.M) # run for all beamformers considered for i, bfr in enumerate(beamformer_names): # compute the beamforming weights bf_weights_fun[i](good_sources, bad_sources, R_n=sigma2_n * np.eye(mics.M), attn=True, ff=False) output = mics.process() output = normalize(highpass(output, Fs)) output = time_align(reference_n, output) # save files for PESQ evaluation wavfile.write(files_bf[i], Fs, to_16b(output)) # compute output SINR osinr_bf[i, k] = snr(reference_n, output) # compute PESQ pesq_bf[:, i, k] = pesq(file_ref, files_bf[i], Fs=Fs).T # end of beamformers loop # end of number of sources loop return pesq_input, pesq_trinicon, pesq_bf, isinr, osinr_trinicon, osinr_bf
if shape is 'Circular': mics = bf.Beamformer.circular2D(Fs, mic1, M, phi, d*M/(2*np.pi)) else: mics = bf.Beamformer.linear2D(Fs, mic1, M, phi, d) # define the array processing type L = 4096 # frame length hop = 2048 # hop between frames zp = 2048 # zero padding (front + back) mics.setProcessing('FrequencyDomain', L, hop, zp, zp) # The first signal (of interest) is singing rate1, signal1 = wavfile.read('samples/singing_'+str(Fs)+'.wav') signal1 = np.array(signal1, dtype=float) signal1 = u.normalize(signal1) signal1 = u.highpass(signal1, Fs) delay1 = 0. # the second signal (interferer) is some german speech rate2, signal2 = wavfile.read('samples/german_speech_'+str(Fs)+'.wav') signal2 = np.array(signal2, dtype=float) signal2 = u.normalize(signal2) signal2 = u.highpass(signal2, Fs) delay2 = 1. # compute the noise variance at center of array wrt signal1 and SNR sigma2_signal1 = np.mean(signal1**2) distance = np.linalg.norm(mics.center[:,0] - np.array(good_source)) sigma2_n = sigma2_signal1/(4*np.pi*distance)**2/10**(SNR_at_mic/10) # create the room with sources and mics
if shape is 'Circular': mics = bf.Beamformer.circular2D(Fs, mic1, M, phi, d * M / (2 * np.pi)) else: mics = bf.Beamformer.linear2D(Fs, mic1, M, phi, d) # define the array processing type L = 4096 # frame length hop = 2048 # hop between frames zp = 2048 # zero padding (front + back) mics.setProcessing('FrequencyDomain', L, hop, zp, zp) # The first signal (of interest) is singing rate1, signal1 = wavfile.read('samples/singing_' + str(Fs) + '.wav') signal1 = np.array(signal1, dtype=float) signal1 = u.normalize(signal1) signal1 = u.highpass(signal1, Fs) delay1 = 0. # the second signal (interferer) is some german speech rate2, signal2 = wavfile.read('samples/german_speech_' + str(Fs) + '.wav') signal2 = np.array(signal2, dtype=float) signal2 = u.normalize(signal2) signal2 = u.highpass(signal2, Fs) delay2 = 1. # compute the noise variance at center of array wrt signal1 and SNR sigma2_signal1 = np.mean(signal1**2) distance = np.linalg.norm(mics.center[:, 0] - np.array(good_source)) sigma2_n = sigma2_signal1 / (4 * np.pi * distance)**2 / 10**(SNR_at_mic / 10) # create the room with sources and mics
def perceptual_quality_evaluation(good_source, bad_source): ''' Perceputal Quality evaluation simulation Inner Loop ''' # Imports are done in the function so that it can be easily # parallelized import numpy as np from scipy.io import wavfile from scipy.signal import resample from os import getpid from Room import Room from beamforming import Beamformer, MicrophoneArray from trinicon import trinicon from utilities import normalize, to_16b, highpass from phat import time_align from metrics import snr, pesq # number of number of sources n_sources = np.arange(1,12) S = n_sources.shape[0] # we the speech samples used speech_sample1 = 'samples/fq_sample1_8000.wav' speech_sample2 = 'samples/fq_sample2_8000.wav' # Some simulation parameters Fs = 8000 t0 = 1./(Fs*np.pi*1e-2) # starting time function of sinc decay in RIR response absorption = 0.90 max_order_sim = 10 SNR_at_mic = 20 # SNR at center of microphone array in dB # Room 1 : Shoe box room_dim = [4, 6] # microphone array design parameters mic1 = [2, 1.5] # position M = 8 # number of microphones d = 0.08 # distance between microphones phi = 0. # angle from horizontal shape = 'Linear' # array shape # create a microphone array if shape is 'Circular': mics = Beamformer.circular2D(Fs, mic1, M, phi, d*M/(2*np.pi)) else: mics = Beamformer.linear2D(Fs, mic1, M, phi, d) # create a single reference mic at center of array ref_mic = MicrophoneArray(mics.center, Fs) # define the array processing type L = 4096 # frame length hop = 2048 # hop between frames zp = 2048 # zero padding (front + back) mics.setProcessing('FrequencyDomain', L, hop, zp, zp) # data receptacles beamformer_names = ['Rake-DS', 'Rake-MaxSINR', 'Rake-MaxUDR'] bf_weights_fun = [mics.rakeDelayAndSumWeights, mics.rakeMaxSINRWeights, mics.rakeMaxUDRWeights] bf_fnames = ['1','2','3'] NBF = len(beamformer_names) # receptacle arrays pesq_input = np.zeros(2) pesq_trinicon = np.zeros((2,2)) pesq_bf = np.zeros((2,NBF,S)) isinr = 0 osinr_trinicon = np.zeros(2) osinr_bf = np.zeros((NBF,S)) # since we run multiple thread, we need to uniquely identify filenames pid = str(getpid()) file_ref = 'output_samples/fqref' + pid + '.wav' file_suffix = '-' + pid + '.wav' files_tri = ['output_samples/fqt' + str(i+1) + file_suffix for i in xrange(2)] files_bf = ['output_samples/fq' + str(i+1) + file_suffix for i in xrange(NBF)] file_raw = 'output_samples/fqraw' + pid + '.wav' # Read the two speech samples used rate, good_signal = wavfile.read(speech_sample1) good_signal = np.array(good_signal, dtype=float) good_signal = normalize(good_signal) good_signal = highpass(good_signal, rate) good_len = good_signal.shape[0]/float(Fs) rate, bad_signal = wavfile.read(speech_sample2) bad_signal = np.array(bad_signal, dtype=float) bad_signal = normalize(bad_signal) bad_signal = highpass(bad_signal, rate) bad_len = bad_signal.shape[0]/float(Fs) # variance of good signal good_sigma2 = np.mean(good_signal**2) # normalize interference signal to have equal power with desired signal bad_signal *= good_sigma2/np.mean(bad_signal**2) # pick good source position at random good_distance = np.linalg.norm(mics.center[:,0] - np.array(good_source)) # pick bad source position at random bad_distance = np.linalg.norm(mics.center[:,0] - np.array(bad_source)) if good_len > bad_len: good_delay = 0 bad_delay = (good_len - bad_len)/2. else: bad_delay = 0 good_delay = (bad_len - good_len)/2. # compute the noise variance at center of array wrt good signal and SNR sigma2_n = good_sigma2/(4*np.pi*good_distance)**2/10**(SNR_at_mic/10) # create the reference room for freespace, noisless, no interference simulation ref_room = Room.shoeBox2D( [0,0], room_dim, Fs, t0 = t0, max_order=0, absorption=absorption, sigma2_awgn=0.) ref_room.addSource(good_source, signal=good_signal, delay=good_delay) ref_room.addMicrophoneArray(ref_mic) ref_room.compute_RIR() ref_room.simulate() reference = ref_mic.signals[0] reference_n = normalize(reference) # save the reference desired signal wavfile.write(file_ref, Fs, to_16b(reference_n)) # create the 'real' room with sources and mics room1 = Room.shoeBox2D( [0,0], room_dim, Fs, t0 = t0, max_order=max_order_sim, absorption=absorption, sigma2_awgn=sigma2_n) # add sources to room room1.addSource(good_source, signal=good_signal, delay=good_delay) room1.addSource(bad_source, signal=bad_signal, delay=bad_delay) # Record first the degraded signal at reference mic (center of array) room1.addMicrophoneArray(ref_mic) room1.compute_RIR() room1.simulate() raw_n = normalize(highpass(ref_mic.signals[0], Fs)) # save degraded reference signal wavfile.write(file_raw, Fs, to_16b(raw_n)) # Compute PESQ and SINR of raw degraded reference signal isinr = snr(reference_n, raw_n[:reference_n.shape[0]]) pesq_input[:] = pesq(file_ref, file_raw, Fs=Fs).T # Now record input of microphone array room1.addMicrophoneArray(mics) room1.compute_RIR() room1.simulate() # Run the Trinicon algorithm double_sig = mics.signals.copy() for i in xrange(2): double_sig = np.concatenate((double_sig, mics.signals), axis=1) sig_len = mics.signals.shape[1] output_trinicon = trinicon(double_sig)[:,-sig_len:] # normalize time-align and save to file output_tri1 = normalize(highpass(output_trinicon[0,:], Fs)) output_tri1 = time_align(reference_n, output_tri1) wavfile.write(files_tri[0], Fs, to_16b(output_tri1)) output_tri2 = normalize(highpass(output_trinicon[1,:], Fs)) output_tri2 = time_align(reference_n, output_tri2) wavfile.write(files_tri[1], Fs, to_16b(output_tri2)) # evaluate # Measure PESQ and SINR for both output signals, we'll sort out later pesq_trinicon = pesq(file_ref, files_tri, Fs=Fs) osinr_trinicon[0] = snr(reference_n, output_tri1) osinr_trinicon[1] = snr(reference_n, output_tri2) # Run all the beamformers for k,s in enumerate(n_sources): ''' BEAMFORMING PART ''' # Extract image sources locations and create noise covariance matrix good_sources = room1.sources[0].getImages(n_nearest=s, ref_point=mics.center) bad_sources = room1.sources[1].getImages(n_nearest=s, ref_point=mics.center) Rn = sigma2_n*np.eye(mics.M) # run for all beamformers considered for i, bfr in enumerate(beamformer_names): # compute the beamforming weights bf_weights_fun[i](good_sources, bad_sources, R_n = sigma2_n*np.eye(mics.M), attn=True, ff=False) output = mics.process() output = normalize(highpass(output, Fs)) output = time_align(reference_n, output) # save files for PESQ evaluation wavfile.write(files_bf[i], Fs, to_16b(output)) # compute output SINR osinr_bf[i,k] = snr(reference_n, output) # compute PESQ pesq_bf[:,i,k] = pesq(file_ref, files_bf[i], Fs=Fs).T # end of beamformers loop # end of number of sources loop return pesq_input, pesq_trinicon, pesq_bf, isinr, osinr_trinicon, osinr_bf