def save_main_beam(self, rs_x, rs_y, sp_x, sp_y, is_circular, mic_num, c_x, c_y, i_m_d, angle, room_frequency, freques, room_temp=None, room_humidity=None, is_airAbsorption=None): # Create a rs_x by rs_y metres shoe box room room = pra.ShoeBox([rs_x, rs_y], fs=room_frequency, temperature=room_temp, humidity=room_humidity, air_absorption=is_airAbsorption) # Add a source somewhere in the room room.add_source([sp_x, sp_y]) # Create a linear array beamformer with 4 microphones # with angle 0 degrees and inter mic distance 10 cm if (is_circular): R = pra.circular_2D_array([c_x, c_y], mic_num, angle, i_m_d) else: R = pra.linear_2D_array([c_x, c_y], mic_num, angle, i_m_d) room.add_microphone_array(pra.Beamformer(R, room.fs)) # Now compute the delay and sum weights for the beamformer room.mic_array.rake_delay_and_sum_weights(room.sources[0][:1]) # plot the room and resulting beamformer room.plot(freq=freques, img_order=0) # plt.show() plt.savefig('./fig.png')
def main_doa(self,az,di,S,c1,f,n,fbm,fbM,r_x,r_y,room_temp,room_humidity,is_airAbsorption,num, angle, raduis, algos,save): # We define a meaningful distance measure on the circle # Location of original source azimuth = az / 180.0 * np.pi distance = di SNR = S # signal-to-noise ratio c = c1 # speed of sound fs = f # sampling frequency nfft = n # FFT size freq_bins = np.arange(fbm, fbM) # FFT bins to use for estimation # compute the noise variance sigma2 = 10 ** (-SNR / 10) / (4.0 * np.pi * distance) ** 2 # Create an anechoic room room_dim = np.r_[r_x, r_y] aroom = pra.ShoeBox(room_dim, fs=fs, max_order=0, sigma2_awgn=sigma2,temperature=room_temp,humidity=room_humidity,air_absorption=is_airAbsorption) # add the source source_location = room_dim / 2 + distance * np.r_[np.cos(azimuth), np.sin(azimuth)] source_signal = np.random.randn((nfft // 2 + 1) * nfft) aroom.add_source(source_location, signal=source_signal) # We use a circular array R = pra.circular_2D_array(room_dim / 2, num, angle,raduis) aroom.add_microphone_array(pra.MicrophoneArray(R, fs=aroom.fs)) # run the simulation aroom.simulate() # Compute the STFT frames needed X = np.array( [ pra.transform.stft.analysis(signal, nfft, nfft // 2).T for signal in aroom.mic_array.signals ] ) for algo_name in algos: # Construct the new DOA object doa = pra.doa.algorithms[algo_name](R, fs, nfft, c=c) # this call here perform localization on the frames in X doa.locate_sources(X, freq_bins=freq_bins) doa.polar_plt_dirac() plt.title(algo_name) # doa.azimuth_recon contains the reconstructed location of the source if algo_name == "CSSM": self.label_62.setText('{:.2f}'.format(float(doa.azimuth_recon / np.pi * 180.0))) self.label_68.setText('{:.2f}'.format(float(circ_dist(azimuth, doa.azimuth_recon) / np.pi * 180.0))) if algo_name == "MUSIC": self.label_56.setText('{:.2f}'.format(float(doa.azimuth_recon / np.pi * 180.0))) self.label_66.setText('{:.2f}'.format(float(circ_dist(azimuth, doa.azimuth_recon) / np.pi * 180.0))) if algo_name == "SRP": self.label_61.setText('{:.2f}'.format(float(doa.azimuth_recon / np.pi * 180.0))) self.label_67.setText('{:.2f}'.format(float(circ_dist(azimuth, doa.azimuth_recon) / np.pi * 180.0))) if algo_name == "TOPS": self.label_64.setText('{:.2f}'.format(float(doa.azimuth_recon / np.pi * 180.0))) self.label_70.setText('{:.2f}'.format(float(circ_dist(azimuth, doa.azimuth_recon) / np.pi * 180.0))) if algo_name == "WAVES": self.label_63.setText('{:.2f}'.format(float(doa.azimuth_recon / np.pi * 180.0))) self.label_69.setText('{:.2f}'.format(float(circ_dist(azimuth, doa.azimuth_recon) / np.pi * 180.0))) if save: plt.savefig("./{}.png".format(algo_name)) if not save: plt.show()
def get_nodes_centers(self): """Places the nodes centers on the table. Returns: tuple: node centers, 0 """ # Preallocate nodes_centers = np.zeros((self.n_nodes, 3)) # Parameters table_center, table_radius = self.get_table_position() self.phi_t = 2 * np.pi / self.n_nodes * np.random.rand( ) # Central symmetry --> Redundant angles # Position nodes_centers[:, :2] = circular_2D_array(table_center[:2], self.n_nodes, self.phi_t, table_radius - self.d_nt).T # Random shift (to avoid perfect circle) angles_proj = self.get_nodes_angles()[-1] coords_to_add = -self.d_nt + (self.d_max - self.d_nt_range[0] ) * np.random.rand(self.n_sources, 1) nodes_centers[:, :2] += coords_to_add * angles_proj # Set height nodes_centers[:, -1] = table_center[-1] # Nodes on the table return nodes_centers, 0
def add_circular_microphones(self): """Adds the microphones to the room class. The microphones are positioned on a circle centered at the node centers. The height of the microphones is constant at self.z_m. Returns: np.ndarray: Microphone coordinates. """ n_mics = np.sum(self.sensors_per_node) # Total number of microphones node_mics = np.zeros( (3, n_mics) ) # Array of mics (has to be (x, y [, z]) x n_mics for pra.mic_array) mics_created = 0 for i_n in range(self.n_nodes): local_mics = np.zeros( (3, self.sensors_per_node[i_n] )) # See pra.MicrophoneArray: n_mic is second dime local_mics[:2, :] = circular_2D_array( center=self.nodes_centers[i_n][:2], M=self.sensors_per_node[i_n], phi0=np.pi / 2 * np.random.rand(), radius=self.d_mn) local_mics[-1, :] = self.nodes_centers[i_n][-1] node_mics[:, mics_created:mics_created + self.sensors_per_node[i_n]] = local_mics mics_created += self.sensors_per_node[i_n] return node_mics
def __init__(self): self.t_data = np.empty((8, 0)) self.t_data_len = 512 * 5 self.c = 343. # speed of sound self.fs = 16000 # sampling frequency self.nfft = 256 # FFT size self.freq_range = [300, 3500] self.flag = True room_dim = np.r_[10., 10.] self.echo = pra.circular_2D_array(center=room_dim / 2, M=8, phi0=np.pi, radius=37.5e-3) self.doa = pra.doa.algorithms["MUSIC"](self.echo, self.fs, self.nfft, c=self.c, num_src=1) self.spatial_resp = np.empty((360, )) self.pub = rospy.Publisher("/HarkPower", HarkPower, queue_size=1) self.sub = rospy.Subscriber("/HarkWave", HarkWave, self._callback, queue_size=1)
def generate_mic_array(mic_radius: float, n_mics: int, pos): """ Generate a list of Microphone objects Radius = 50th percentile of men Bitragion breadth (https://en.wikipedia.org/wiki/Human_head) """ R = pra.circular_2D_array(center=[pos[0], pos[1]], M=n_mics, phi0=0, radius=mic_radius) R = np.concatenate((R, np.ones((1, n_mics)) * pos[2]), axis=0) return R
def generate_mic_array(room, mic_radius: float, n_mics: int): """ Generate a list of Microphone objects Radius = 50th percentile of men Bitragion breadth (https://en.wikipedia.org/wiki/Human_head) """ R = pra.circular_2D_array(center=[0., 0.], M=n_mics, phi0=0, radius=mic_radius) room.add_microphone_array(pra.MicrophoneArray(R, room.fs))
def get_reverbant_speech(self, speech, noise): meters = np.random.randint(6, 10, 1)[0] distance = np.random.randint(2, 5, 1)[0] rt = (0.5 - 0.01) * np.random.rand() + 0.01 # speech room = pra.ShoeBox([meters, meters], fs=self.sampling_frequency, t0=0., absorption=rt, max_order=12) R = pra.circular_2D_array(center=[distance, distance], M=1, phi0=0, radius=0.07) room.add_microphone_array(pra.MicrophoneArray(R, room.fs)) room.add_source([1, 1], signal=speech) room.simulate() ori_length = len(speech) speech = room.mic_array.signals.T speech = speech[0:ori_length, 0] # noise distance2 = np.random.randint(2, 5, 1)[0] room = pra.ShoeBox([meters, meters], fs=self.sampling_frequency, t0=0., absorption=rt, max_order=12) R = pra.circular_2D_array(center=[distance2, distance2], M=1, phi0=0, radius=0.07) room.add_microphone_array(pra.MicrophoneArray(R, room.fs)) room.add_source([distance2 - 0.5, distance2 - 0.5], signal=noise) room.simulate() ori_length = len(noise) noise = room.mic_array.signals.T noise = noise[0:ori_length, 0] return speech, noise
def make_filters(n_mics): # Location of original source azimuth = 61. / 180. * np.pi # 60 degrees # algorithms parameters c = 343. fs = 16000 # circular microphone array, 6 mics, radius 15 cm R = pra.circular_2D_array([0, 0], n_mics, 0., 0.15) # propagation filter bank propagation_vector = -np.array([np.cos(azimuth), np.sin(azimuth)]) delays = np.dot(R.T, propagation_vector) / c * fs # in fractional samples filter_bank = pra.fractional_delay_filter_bank(delays) return filter_bank
def generate_room_from_conditions(ang,distances,RT,dimensions,typeofroom): # Create an anechoic room room_dim = dimensions[typeofroom] aroom = pra.ShoeBox(room_dim, fs=fs, max_order=0, sigma2_awgn=sigma2) echo = pra.circular_2D_array(center = room_dim*0.5, M=4, phi0=0, radius=0.0047746483) aroom.add_microphone_array(pra.MicrophoneArray(echo, aroom.fs)) # Add sources of 1 second duration rng = np.random.RandomState(23) duration_samples = int(fs) source_location = room_dim / 2 + distance * np.r_[np.cos(ang), np.sin(ang)] source_signal = rng.randn(duration_samples) aroom.add_source(source_location, signal=source_signal) # Run the simulation aroom.simulate() #Returns the signals for the 4 different micros in these conditions return aroom.mic_array.signals
def get_source_positions(self): """ Place the sources around the table in front of the nodes. `self.node_centers` should aready be not None. Returns: - sources_positions (np.ndarray): sources positions (n_sources x (x, y, z)) - n_trials (int): Is equal to 0 if the positions are valid, to 100 otherwise (see `self.create_room_setup`) """ sources_positions = np.zeros((self.n_nodes, 3)) sources_positions[:, :2] = circular_2D_array( self.table_center[:2], self.n_nodes, self.phi_t, self.table_radius + self.d_st).T for i_s in range(self.n_sources): sources_positions[i_s, -1] = self.z_range_s[0] + ( self.z_range_s[1] - self.z_range_s[0]) * np.random.rand() # Check that the sources are in the room n_trials = 0 if (np.any(sources_positions[:, :2] <= self.d_sw)\ or np.any(sources_positions[:, 0] >= self.length - self.d_sw)\ or np.any(sources_positions[:, 1] >= self.width - self.d_sw)): n_trials = 100 return np.array(sources_positions), n_trials
fs, raw_data = scipy.io.wavfile.read(sys.argv[1]) print(f"Detected sampling frequency: {fs}Hz") print(f"Found {raw_data.shape[1]} channels.") raw_channels = raw_data.T channels = np.copy(raw_channels) # channels[1] = raw_channels[2] # channels[2] = raw_channels[1] # channels = raw_data.T[::-1] c = 13503.94 # speed of sound in inches/second nfft = 256 # FFT size freq_range = [300, 3500] mic_positions = pra.circular_2D_array(center=(0, 0), M=3, phi0=-math.pi / 6, radius=8.66) print(mic_positions) X = np.array([ pra.stft(channel, nfft, nfft // 2, transform=np.fft.rfft).T for channel in channels ]) doa = pra.doa.algorithms["MUSIC"](mic_positions, fs, nfft, c=c, num_src=1, max_four=4) doa.locate_sources(X, freq_range=freq_range)
pos_noise = [2.8, 4.3] #the number of mic you want to place in the room number_mics = 6 # creation of the mic_array in a special way such that we can use beamforming # shape of the array shape = 'Circular' #position of the center mic mic = np.array([2, 1.5]) # radius of the array d = 5 # the angle from horizontal phi = 0. # creation of the array if shape is 'Circular': R = pra.circular_2D_array(mic, number_mics, phi, radius=d) else: R = pra.linear_2D_array(mic, number_mics, phi, d) R = np.concatenate((R, np.array(mic, ndmin=2).T), axis=1) # FFT length N = 1024 # desired basis word(s) (can also be a list) desired_word = 'yes' #choose your label file labels_file = "conv_labels.txt" # choose your graph file graph_file = "my_frozen_graph.pb" # destination directory to write your new samples dest_dir = 'output_final_beamforming' if not os.path.exists(dest_dir):
import numpy as np import pyroomacoustics as pra room = pra.ShoeBox([4, 6], fs=16000, max_order=1) # add sources in the room room.add_source([2, 1.5]) # nice source room.add_source([2, 4.5]) # interferer # add a circular beamforming array shape = pra.circular_2D_array([2.5, 3], 8, 0.0, 0.15) bf = pra.Beamformer(shape, room.fs, Lg=500) room.add_microphone_array(bf) # run the ISM room.image_source_model() # the noise matrix, note that the size is the number of # sensors multiplied by the filter size Rn = np.eye(bf.M * bf.Lg) * 1e-5 def test_rake_max_udr_filters(): # no interferer bf.rake_max_udr_filters(room.sources[0][:4], R_n=Rn, delay=0.015, epsilon=1e-2) # with interferer bf.rake_max_udr_filters( room.sources[0][:4],
def srp_phat(s, fs, nFFT=None, center=None, d=None, azimuth_estm=None, mode=None): ''' Applies Steered Power Response with phase transform algorithm Uses pyroomacoustics module Input params ------------ s: numpy array -stacked microphone array signals (NOTE: number of microphones is extracted from the size of input signal, since the input signal will be of size MxN where M is number of microphones and N is the length of the audio signal.) fs: int -Sampling frequency nfft: int -FFT size. Default 1024 center: numpy array -Defines the center of the room. Default [0,0] d: int -Distance between microphones. Default 10cm. azimuth_estm: numpy array -Candidate azimuth estimates, representing location estimates of speakers. Default expects microphone to be in the middle of a table and speakers located around it. Assumes two speakers - [60,120] mode: str -Defines the microphone setup layout. Default mode = linear. mode = linear mode = circular ''' if nFFT is None: nFFT = 1024 if center is None: center = [0, 0] if d is None: d = 0.1 if azimuth_estm is None: azimuth_estm = [60, 120] freq_bins = np.arange( 30, 330) #list of individual frequency bins used to run DoA M = s.shape[0] #number of microphones phi = 0 #assume angle between microphones is 0 (same y-axis) radius = d * M / (2 * np.pi) #define radius for circular microphone layout c = 343.0 #speed of sound #Define Microphone array layout if mode is 'circular': L = pra.circular_2D_array(center, M, phi, radius) if mode is None or 'linear': L = pra.linear_2D_array(center, M, phi, d) nSrc = len(azimuth_estm) #number of speakers #STFT s_FFT = np.array([ pra.stft(s, nFFT, nFFT // 2, transform=np.fft.rfft).T for s in s ]) #STFT for s1 and s2 #SRP doa = pra.doa.srp.SRP(L, fs, nFFT, c, max_four=4, num_src=nSrc) #perform SRP approximation #Apply SRP-PHAT doa.locate_sources(s_FFT, freq_bins=freq_bins) #PLOTTING doa.polar_plt_dirac() plt.title('SRP-PHAT') print('SRP-PHAT') print('Speakers at: ', np.sort(doa.azimuth_recon) / np.pi * 180, 'degrees') plt.show()
def plot_room_setup(filename, n_mics, n_targets, parameters): ''' Plot the room scenario in 2D ''' n_interferers = parameters['n_interferers'] n_blinkies = parameters['n_blinkies'] ref_mic = parameters['ref_mic'] room_dim = np.array(parameters['room_dim']) # total number of sources n_sources = n_interferers + n_targets # Geometry of the room and location of sources and microphones interferer_locs = random_layout([3., 5.5, 1.5], n_interferers, offset=[6.5, 1., 0.5], seed=1) target_locs = semi_circle_layout( [4.1, 3.755, 1.2], np.pi / 1.5, 2., # 120 degrees arc, 2 meters away n_targets, rot=0.743 * np.pi, ) source_locs = np.concatenate((target_locs, interferer_locs), axis=1) if parameters['blinky_geometry'] == 'gm': ''' Normally distributed in the vicinity of each source ''' blinky_locs = gm_layout( n_blinkies, target_locs - np.c_[[0., 0., 0.5]], std=[0.4, 0.4, 0.05], seed=987, ) elif parameters['blinky_geometry'] == 'grid': ''' Placed on a regular grid, with a little bit of noise added ''' blinky_locs = grid_layout([3., 5.5], n_blinkies, offset=[1., 1., 0.7], seed=987) else: ''' default is semi-circular ''' blinky_locs = semi_circle_layout( [4.1, 3.755, 1.1], np.pi, 3.5, n_blinkies, rot=0.743 * np.pi - np.pi / 4, seed=987, ) mic_locs = np.vstack(( pra.circular_2D_array([4.1, 3.76], n_mics, np.pi / 2, 0.02), 1.2 * np.ones((1, n_mics)), )) all_locs = np.concatenate((mic_locs, blinky_locs), axis=1) # Create the room itself room = pra.ShoeBox(room_dim[:2]) for loc in source_locs.T: room.add_source(loc[:2]) # Place the microphone array room.add_microphone_array(pra.MicrophoneArray(all_locs[:2, :], fs=room.fs)) room.plot(img_order=0) plt.xlim([-0.1, room_dim[0] + 0.1]) plt.ylim([-0.1, room_dim[1] + 0.1]) plt.savefig(filename)
max_order = 3 room_dim = [4, 6] snr_vals = np.arange(100, -10, -10) desired_word = 'yes' pos_source = [1, 4.5] pos_noise = [2.8, 4.3] number_mics = 3 mic = np.array([2, 1.5]) #position d = 0.08 #distance between microphones phi = 0. #angle from horizontal shape = 'Circular' #array shape N = 1024 #FFT length #Create the Microphone array if shape is 'Circular': R = pra.circular_2D_array(mic, number_mics, phi, d * number_mics / (2 * np.pi)) else: R = pra.linear_2D_array(mic, number_mics, phi, d) R = np.concatenate((R, np.array(mic, ndmin=2).T), axis=1) 3 #create object dataset = pra.datasets.GoogleSpeechCommands(download=True, subset=1) print(dataset) #separate the noise and the speech samples noise_samps = dataset.filter(speech=0) speech_samps = dataset.filter(speech=1) speech_samps = speech_samps.filter(word=desired_word) #pick one of each from WAV speech = speech_samps[0]
# We define a meaningful distance measure on the circle # Location of original source azimuth = 61. / 180. * np.pi # 60 degrees distance = 3. # 3 meters ####################### # algorithms parameters SNR = 0. # signal-to-noise ratio c = 343. # speed of sound fs = 44100 # sampling frequency nfft = 256 # FFT size freq_bins = np.arange(5, 60) # FFT bins to use for estimation # We use a circular array with radius 15 cm # and 12 microphones R = pra.circular_2D_array((5, 5), 6, 0., 0.0463) source = np.array([2, 5]) doatxt = "" for blocks in range(0, data.shape[0] // ti): #for blocks in range(0,10): signals = [] for i in range(0, data.shape[1]): signals.append(data[blocks * ti:(blocks + 1) * ti, i]) #print(len(signals[0])) X = np.array([ pra.stft(signal, nfft, nfft // 2, transform=np.fft.rfft).T for signal in signals ]) #X shape should be (6,129,24)
def doa(source_signal): ###### # We define a meaningful distance measure on the circle # Location of original source azimuth = 61. / 180. * np.pi # 60 degrees distance = 3. # 3 meters ####################### # algorithms parameters SNR = 0. # signal-to-noise ratio c = 343. # speed of sound fs = 16000 # sampling frequency nfft = 256 # FFT size freq_bins = np.arange(5, 60) # FFT bins to use for estimation # compute the noise variance sigma2 = 10 ** (-SNR / 10) / (4. * np.pi * distance) ** 2 # Create an anechoic room room_dim = np.r_[10., 10.] aroom = pra.ShoeBox(room_dim, fs=fs, max_order=0, sigma2_awgn=sigma2) # add the source # source_location = room_dim / 2 + distance * np.r_[np.cos(azimuth), np.sin(azimuth)] # source_signal = np.random.randn((nfft // 2 + 1) * nfft) # aroom.add_source(source_location, signal=source_signal) # We use a circular array with radius 15 cm # and 12 microphones R = pra.circular_2D_array(room_dim / 2, 8, 0., 0.15) aroom.add_microphone_array(pra.MicrophoneArray(R, fs=aroom.fs)) # run the simulation # aroom.simulate() # print(aroom.mic_array.signals) # print(source_signal) ################################ # Compute the STFT frames needed X = np.array([ pra.stft(signal, nfft, nfft // 2, transform=np.fft.rfft).T for signal in source_signal]) ############################################## # Now we can test all the algorithms available # algo_names = sorted(pra.doa.algorithms.keys()) algo_name = "MUSIC" # Construct the new DOA object # the max_four parameter is necessary for FRIDA only doa = pra.doa.algorithms[algo_name](R, fs, nfft, c=c, max_four=4) # this call here perform localization on the frames in X doa.locate_sources(X, freq_bins=freq_bins) # doa.polar_plt_dirac() # plt.title(algo_name) # doa.azimuth_recon contains the reconstructed location of the source # print(algo_name) # print(' Recovered azimuth:', , 'degrees') # print(' Error:', circ_dist(azimuth, doa.azimuth_recon) / np.pi * 180., 'degrees') # plt.savefig('./figures/fig'+str(time.time())+'.png') # print(doa.azimuth_recon) return doa.azimuth_recon[0] / np.pi * 180.
# fix the RNG seed for repeatability np.random.seed(0) # Location of original source azimuth = 61.0 / 180.0 * np.pi # 60 degrees tol = 0.3 / 180.0 * np.pi # 0.3 degrees tolerance for the test # algorithms parameters c = 343.0 fs = 16000 nfft = 256 freq_bins = np.arange(5, 60) # circular microphone array, 12 mics, radius 15 cm R = pra.circular_2D_array([0, 0], 12, 0.0, 0.15) # propagation filter bank propagation_vector = -np.array([np.cos(azimuth), np.sin(azimuth)]) delays = np.dot(R.T, propagation_vector) / c * fs # in fractional samples filter_bank = pra.fractional_delay_filter_bank(delays) # we use a white noise signal for the source x = np.random.randn((nfft // 2 + 1) * nfft) # convolve the source signal with the fractional delay filters # to get the microphone input signals mic_signals = np.array( [fftconvolve(x, filter, mode="same") for filter in filter_bank]) X = pra.transform.stft.analysis(mic_signals.T, nfft,
result = pd.DataFrame(columns=['Alpha', 'ANCRed', 'ANCRedBeam']) # # Create Room, Sensors and calculate beam patterns Lg_t = 0.100 # filter size in seconds Lg = np.ceil(Lg_t * Fs) # filter size in samples alphas = np.arange(0.1, 1, 0.05) source = np.array([1, 4.5]) interferer = np.array([3.5, 3.]) radius = 0.15 roomDim = [8, 6] center = [1, 3.5] fft_len = 512 echo = pra.circular_2D_array(center=center, M=6, phi0=0, radius=radius) echo = np.concatenate((echo, np.array(center, ndmin=2).T), axis=1) for alpha in alphas: room_bf = pra.ShoeBox(roomDim, fs=Fs, max_order=64, absorption=alpha) mics = pra.Beamformer(echo, room_bf.fs, N=fft_len, Lg=Lg) room_bf.add_microphone_array(mics) room_bf.add_source(source, delay=0., signal=xtone) room_bf.add_source(interferer, delay=0, signal=silence) # Compute DAS weights mics.rake_delay_and_sum_weights(room_bf.sources[0][:1]) # # Do Beamforming room_bf.image_source_model(use_libroom=True)
win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2) # algorithm parameters n_iter = 51 n_nmf_sub_iter = 20 sparse_reg = 0.0 # pre-emphasis of blinky signals pre_emphasis = False # Geometry of the room and location of sources and microphones room_dim = np.array([10, 7.5, 3]) mic_locs = np.vstack(( pra.circular_2D_array([4.1, 3.76], n_mics, np.pi / 2, 0.02), 1.2 * np.ones((1, n_mics)), )) target_locs = semi_circle_layout([4.1, 3.755, 1.1], np.pi / 2, 2.0, n_sources_target, rot=0.743 * np.pi) # interferer_locs = grid_layout([3., 5.5], n_sources - n_sources_target, offset=[6.5, 1., 1.7]) interferer_locs = random_layout([3.0, 5.5, 1.5], n_sources - n_sources_target, offset=[6.5, 1.0, 0.5], seed=1) source_locs = np.concatenate((target_locs, interferer_locs), axis=1)
nfft = 512 # approximately 30 overlapping frames / second vad_thresh = -30 # dB r, target_audio = wavfile.read('../audio/DR1_FAKS0_SX133_SX313_SX223.wav') assert r == fs_sound target_audio = np.concatenate([np.zeros(fs_sound), target_audio.astype(np.float32)]) target_audio /= target_audio.max() sigma_s = np.std(target_audio) sound_time = np.arange(target_audio.shape[0]) / fs_sound mics_loc = np.c_[ [1, 1.9825], # mic 1 [1, 2.0175], # mic 2 ] # location of microphone array mics_loc = pra.circular_2D_array([2., 2.], 6, 0, 0.0625) # kindof compactsix src_loc = np.r_[5, 2] # source location noise_loc = np.r_[2.5, 4.5] SIR = 25 # decibels SINR = SIR - 1 # decibels sigma_i, sigma_n = compute_variances(SIR, SINR, src_loc, noise_loc, mics_loc.mean(axis=1), sigma_s=sigma_s) interference_audio = np.random.randn(target_audio.shape[0] + fs_sound) * sigma_i room = pra.ShoeBox([6,5], fs=16000, max_order=12, absorption=0.4, sigma2_awgn=sigma_n**2) room.add_source(src_loc, signal=target_audio) room.add_source(noise_loc, signal=interference_audio) # conventional microphone array
def one_loop(args): import numpy np = numpy import pyroomacoustics pra = pyroomacoustics import sys sys.path.append(parameters['base_dir']) from routines import semi_circle_layout, random_layout, gm_layout, grid_layout from blinkiva import blinkiva from blinkiva_gauss import blinkiva_gauss from generate_samples import wav_read_center n_targets, n_mics, rt60, sinr, wav_files, seed = args # this is the underdetermined case. We don't do that. if n_mics < n_targets: return [] # set MKL to only use one thread if present try: import mkl mkl.set_num_threads(1) except ImportError: pass # set the RNG seed rng_state = np.random.get_state() np.random.seed(seed) # STFT parameters framesize = parameters['stft_params']['framesize'] win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2) # Generate the audio signals # get the simulation parameters from the json file # Simulation parameters n_repeat = parameters['n_repeat'] fs = parameters['fs'] snr = parameters['snr'] n_interferers = parameters['n_interferers'] n_blinkies = parameters['n_blinkies'] ref_mic = parameters['ref_mic'] room_dim = np.array(parameters['room_dim']) sources_var = np.ones(n_targets) sources_var[0] = parameters['weak_source_var'] # total number of sources n_sources = n_interferers + n_targets # Geometry of the room and location of sources and microphones interferer_locs = random_layout([3., 5.5, 1.5], n_interferers, offset=[6.5, 1., 0.5], seed=1) target_locs = semi_circle_layout( [4.1, 3.755, 1.2], np.pi / 1.5, 2., # 120 degrees arc, 2 meters away n_targets, rot=0.743 * np.pi, ) source_locs = np.concatenate((target_locs, interferer_locs), axis=1) if parameters['blinky_geometry'] == 'gm': ''' Normally distributed in the vicinity of each source ''' blinky_locs = gm_layout( n_blinkies, target_locs - np.c_[[0., 0., 0.5]], std=[0.4, 0.4, 0.05], seed=987, ) elif parameters['blinky_geometry'] == 'grid': ''' Placed on a regular grid, with a little bit of noise added ''' blinky_locs = grid_layout([3.,5.5], n_blinkies, offset=[1., 1., 0.7], seed=987,) else: ''' default is semi-circular ''' blinky_locs = semi_circle_layout( [4.1, 3.755, 1.1], np.pi, 3.5, n_blinkies, rot=0.743 * np.pi - np.pi / 4, seed=987, ) mic_locs = np.vstack(( pra.circular_2D_array([4.1, 3.76], n_mics, np.pi / 2, 0.02), 1.2 * np.ones((1, n_mics)), )) all_locs = np.concatenate((mic_locs, blinky_locs), axis=1) signals = wav_read_center(wav_files, seed=123) # Create the room itself room = pra.ShoeBox(room_dim, fs=fs, absorption=parameters['rt60_list'][rt60]['absorption'], max_order=parameters['rt60_list'][rt60]['max_order'], ) # Place all the sound sources for sig, loc in zip(signals[-n_sources:,:], source_locs.T,): room.add_source(loc, signal=sig) assert len(room.sources) == n_sources, 'Number of signals ({}) doesn''t match number of sources ({})'.format(signals.shape[0], n_sources) # Place the microphone array room.add_microphone_array( pra.MicrophoneArray(all_locs, fs=room.fs) ) # compute RIRs room.compute_rir() # Run the simulation premix = room.simulate(return_premix=True) # Normalize the signals so that they all have unit # variance at the reference microphone p_mic_ref = np.std(premix[:,ref_mic,:], axis=1) premix /= p_mic_ref[:,None,None] # scale to pre-defined variance premix[:n_targets,:,:] *= np.sqrt(sources_var[:,None,None]) # compute noise variance sigma_n = np.sqrt(10 ** (- snr / 10) * np.sum(sources_var)) # now compute the power of interference signal needed to achieve desired SINR sigma_i = np.sqrt( np.maximum(0, 10 ** (- sinr / 10) * np.sum(sources_var) - sigma_n ** 2) / n_interferers ) premix[n_targets:,:,:] *= sigma_i # Mix down the recorded signals mix = np.sum(premix, axis=0) + sigma_n * np.random.randn(*premix.shape[1:]) ref = np.moveaxis(premix, 1, 2) # START BSS ########### # pre-emphasis on blinky signals if parameters['use_pre_emphasis']: mix[n_mics:,:-1] = np.diff(mix[n_mics:,:], axis=1) mix[n_mics:,-1] = 0. # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis( mix.T, framesize, framesize // 2, win=win_a, ) X_mics = X_all[:,:,:n_mics] U_blinky = np.sum(np.abs(X_all[:,:,n_mics:]) ** 2, axis=1) # shape: (n_frames, n_blinkies) # convergence monitoring callback def convergence_callback(Y, n_targets, SDR, SIR, ref, framesize, win_s, algo_name): from mir_eval.separation import bss_eval_sources y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s,) if not algo_name.startswith('blinkiva'): new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:,new_ord] m = np.minimum(y.shape[0]-framesize//2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_targets,:m,0], y[framesize//2:m+framesize//2,:n_targets].T, ) SDR.append(sdr.tolist()) SIR.append(sir.tolist()) # store results in a list, one entry per algorithm results = [] for name, kwargs in parameters['algorithm_kwargs'].items(): results.append({ 'algorithm' : name, 'n_targets' : n_targets, 'n_mics' : n_mics, 'rt60' : rt60, 'sinr' : sinr, 'seed' : seed, 'sdr' : [], 'sir' : [], # to store the result }) if parameters['monitor_convergence']: cb = lambda Y : convergence_callback( Y, n_targets, results[-1]['sdr'], results[-1]['sir'], ref, framesize, win_s, name, ) else: cb = None # In that case, we still want to capture the initial values of SDR/SIR convergence_callback( X_mics, n_targets, results[-1]['sdr'], results[-1]['sir'], ref, framesize, win_s, name, ) if name == 'auxiva': # Run AuxIVA Y = pra.bss.auxiva( X_mics, callback=cb, **kwargs, ) elif name == 'blinkiva': # Run BlinkIVA Y = blinkiva( X_mics, U_blinky, n_src=n_targets, callback=cb, **kwargs, ) elif name == 'blinkiva-gauss': # Run BlinkIVA Y = blinkiva_gauss( X_mics, U_blinky, n_src=n_targets, callback=cb, **kwargs, ) else: continue # The last evaluation convergence_callback( Y, n_targets, results[-1]['sdr'], results[-1]['sir'], ref, framesize, win_s, name, ) # restore RNG former state np.random.set_state(rng_state) return results
# compute the noise variance sigma2 = 10**(-SNR / 10) / (4. * np.pi * distance)**2 # Create an anechoic room room_dim = np.r_[10., 10.] aroom = pra.ShoeBox(room_dim, fs=fs, max_order=0, sigma2_awgn=sigma2) # add the source source_location = room_dim / 2 + distance * np.r_[np.cos(azimuth), np.sin(azimuth)] source_signal = np.random.randn((nfft // 2 + 1) * nfft) aroom.add_source(source_location, signal=source_signal) # We use a circular array with radius 15 cm # and 12 microphones R = pra.circular_2D_array(room_dim / 2, 12, 0., 0.15) aroom.add_microphone_array(pra.MicrophoneArray(R, fs=aroom.fs)) # run the simulation aroom.simulate() ################################ # Compute the STFT frames needed X = np.array([ pra.stft(signal, nfft, nfft // 2, transform=np.fft.rfft).T for signal in aroom.mic_array.signals ]) ############################################## # Now we can test all the algorithms available algo_names = sorted(pra.doa.algorithms.keys())
source_location = room_dim / 2 + distance * \ np.r_[np.cos(azimuth), np.sin(azimuth)] print(f'Source location: {source_location}') aroom = pra.ShoeBox( room_dim, fs=fs, max_order=17, materials=m, sigma2_awgn=sigma2) # sigma2_awgn is the variance of white gaussian noise # add the source source_location = room_dim / 2 + distance * \ np.r_[np.cos(azimuth), np.sin(azimuth)] # source_signal = np.random.randn((nfft // 2 + 1) * nfft) aroom.add_source(source_location, signal=audio) # We use a circular array with radius 15 cm # and 12 microphones print(f'Room center {room_dim/2}') R = pra.circular_2D_array(room_dim / 2, 2, 0.0, 0.15) print(f'R:\n{R}') aroom.add_microphone_array(pra.MicrophoneArray(R, fs=aroom.fs)) # run the simulation aroom.simulate() # print(aroom.mic_array.signals.shape) real_signals = np.zeros((2, len(audio) // 2)) print(f'Real signals: {real_signals.shape}') real_signals[0], real_signals[1] = split_audio(audio) # for signal in aroom.mic_array.signals: # print(signal.shape) # print('______') ################################ # Compute the STFT frames needed
def random_room_builder( source_signals: List[np.ndarray], n_mics: int, mic_delta: Optional[float] = None, fs: float = 16000, t60_interval: Tuple[float, float] = (0.150, 0.500), room_width_interval: Tuple[float, float] = (6, 10), room_height_interval: Tuple[float, float] = (2.8, 4.5), source_zone_height: Tuple[float, float] = [1.0, 2.0], guard_zone_width: float = 0.5, seed: Optional[int] = None, ): """ This function creates a random room within some parameters. The microphone array is circular with the distance between neighboring elements set to the maximal distance avoiding spatial aliasing. Parameters ---------- source_signals: list of numpy.ndarray A list of audio signals for each source n_mics: int The number of microphones in the microphone array mic_delta: float, optional The distance between neighboring microphones in the array fs: float, optional The sampling frequency for the simulation t60_interval: (float, float), optional An interval where to pick the reverberation time room_width_interval: (float, float), optional An interval where to pick the room horizontal length/width room_height_interval: (float, float), optional An interval where to pick the room vertical length source_zone_height: (float, float), optional The vertical interval where sources and microphones are allowed guard_zone_width: float The minimum distance between a vertical wall and a source/microphone Returns ------- ShoeBox object A randomly generated room according to the provided parameters float The measured T60 reverberation time of the room created """ # save current numpy RNG state and set a known seed if seed is not None: rng_state = np.random.get_state() np.random.seed(seed) n_sources = len(source_signals) # sanity checks assert source_zone_height[0] > 0 assert source_zone_height[1] < room_height_interval[0] assert source_zone_height[0] <= source_zone_height[1] assert 2 * guard_zone_width < room_width_interval[1] - room_width_interval[0] def random_location( room_dim, n, ref_point=None, min_distance=None, max_distance=None ): """ Helper function to pick a location in the room """ width = room_dim[0] - 2 * guard_zone_width width_intercept = guard_zone_width depth = room_dim[1] - 2 * guard_zone_width depth_intercept = guard_zone_width height = np.diff(source_zone_height)[0] height_intercept = source_zone_height[0] locs = rand(3, n) locs[0, :] = locs[0, :] * width + width_intercept locs[1, :] = locs[1, :] * depth + depth_intercept locs[2, :] = locs[2, :] * height + height_intercept if ref_point is not None: # Check condition d = np.linalg.norm(locs - ref_point, axis=0) if min_distance is not None and max_distance is not None: redo = np.where(np.logical_or(d < min_distance, max_distance < d))[0] elif min_distance is not None: redo = np.where(d < min_distance)[0] elif max_distance is not None: redo = np.where(d > max_distance)[0] else: redo = [] # Recursively call this function on sources to redraw if len(redo) > 0: locs[:, redo] = random_location( room_dim, len(redo), ref_point=ref_point, min_distance=min_distance, max_distance=max_distance, ) return locs c = pra.constants.get("c") # Create the room # Sometimes the room dimension and required T60 are not compatible, then # we just try again with new random values retry = True while retry: try: room_dim = np.array( [ rand() * np.diff(room_width_interval)[0] + room_width_interval[0], rand() * np.diff(room_width_interval)[0] + room_width_interval[0], rand() * np.diff(room_height_interval)[0] + room_height_interval[0], ] ) t60 = rand() * np.diff(t60_interval)[0] + t60_interval[0] reflection, max_order = inv_sabine(t60, room_dim, c) retry = False except ValueError: pass # Create the room based on the random parameters room = pra.ShoeBox(room_dim, fs=fs, absorption=1 - reflection, max_order=max_order) # The critical distance # https://en.wikipedia.org/wiki/Critical_distance d_critical = 0.057 * np.sqrt(np.prod(room_dim) / t60) # default intermic distance is set according to nyquist criterion # i.e. 1/2 wavelength corresponding to fs / 2 at given speed of sound if mic_delta is None: mic_delta = 0.5 * (c / (0.5 * fs)) # the microphone array is uniformly circular with the distance between # neighboring elements of mic_delta mic_center = random_location(room_dim, 1) mic_rotation = rand() * 2 * np.pi mic_radius = 0.5 * mic_delta / np.sin(np.pi / n_mics) mic_array = pra.MicrophoneArray( np.vstack( ( pra.circular_2D_array( mic_center[:2, 0], n_mics, mic_rotation, mic_radius ), mic_center[2, 0] * np.ones(n_mics), ) ), room.fs, ) room.add_microphone_array(mic_array) # Now we will get the sources at random source_locs = [] # Choose the target location at least as far as the critical distance # Then the other sources, yet one further meter away target_source = random_location( room_dim, 1, ref_point=mic_center, min_distance=d_critical, max_distance=d_critical + 1, ) interferers = random_location( room_dim, n_sources - 1, ref_point=mic_center, min_distance=d_critical + 1 ) source_locs = np.concatenate((target_source, interferers), axis=1) for s, signal in enumerate(source_signals): room.add_source(source_locs[:, s], signal=signal) # pre-compute the impulse responses room.compute_rir() t60_actual = pra.experimental.measure_rt60(room.rir[0][0], room.fs) # restore numpy RNG former state if seed is not None: np.random.set_state(rng_state) return room, t60_actual
mic1 = np.array([2, 1.5]) # position M = 8 # number of microphones d = 0.08 # distance between microphones phi = 0.0 # angle from horizontal max_order_design = 1 # maximum image generation used in design shape = "Linear" # array shape Lg_t = 0.100 # Filter size in seconds Lg = np.ceil(Lg_t * Fs) # Filter size in samples delay = 0.050 # Beamformer delay in seconds # Define the FFT length N = 1024 # Create a microphone array if shape is "Circular": R = pra.circular_2D_array(mic1, M, phi, d * M / (2 * np.pi)) else: R = pra.linear_2D_array(mic1, M, phi, d) # path to samples path = os.path.dirname(__file__) # The first signal (of interest) is singing rate1, signal1 = wavfile.read(path + "/input_samples/singing_" + str(Fs) + ".wav") signal1 = np.array(signal1, dtype=float) signal1 = pra.normalize(signal1) signal1 = pra.highpass(signal1, Fs) delay1 = 0.0 # The second signal (interferer) is some german speech rate2, signal2 = wavfile.read(path + "/input_samples/german_speech_" + str(Fs) + ".wav")
fs = corpus[j].fs else: signal = corpus[j - len(corpus)].data fs = corpus[j - len(corpus)].fs # Add source to 3D room source_position = sources_pos[j] room = pra.Room.from_corners(corners, fs=fs, max_order=8, absorption=0.16 / T60) room.extrude(2.4) room.add_source(source_position, signal=signal) # Add microphones to 3D room R = pra.circular_2D_array(mic_center[:2], M=8, phi0=0, radius=0.1) R = np.concatenate((R, np.ones((1, 8)) * mic_center[2]), axis=0) mics = pra.MicrophoneArray(R, room.fs) room.add_microphone_array(mics) # Compute image sources room.image_source_model() # Simulate the propagation room.compute_rir() room.simulate(snr=SNR) print("MICROPHONES SIGNAL SHAPE FOR SOURCE", j + 1) print("Signal shape for every M:", room.mic_array.signals.shape) # Compute DOA Class
results = [] for coord in all_coords: # set max_order to a low value for a quick (but less accurate) RIR room = pra.Room.from_corners(np_corners, fs=fs, max_order=user_max_order, absorption=user_absorption) # add source and set the signal to WAV file content room.add_source([1., 1.], signal=signal) # in 2-D # add two-microphone array # R = np.array([[3.5, 3.6], [2., 2.]]) # [[x], [y], [z]] # or instead add circular microphone array R = pra.circular_2D_array(center=[2., 2.], M=6, phi0=0, radius=0.1) room.add_microphone_array(pra.MicrophoneArray(R, room.fs)) # compute image sources room.image_source_model(use_libroom=True) fig, ax = room.plot(img_order=6) fig.set_size_inches(16 / 2, 9 / 2) room.plot_rir() fig = plt.gcf() # adjust the figure to show the plots in a viewable manner fig.set_size_inches(10, 5) plt.subplots_adjust(top=0.92, bottom=0.08,