def objective(x, A, sigma, varobj, *args, **kwargs): ''' This is the loss function for the energy based localization from the Microsoft group Paper by Chen et al. ''' m, s, R, X, alpha, scale = varobj.unpack(x) F = (A - m[:,None] - s[None,:] + alpha[0] * np.log(pra.distance(R, X)**2) + scale[0]) / sigma return F.ravel()
def partial_rir(room, n, freqvec): ''' compute the frequency-domain rir based on the n closest image sources Parameters ---------- room: pyroomacoustics.Room The room with sources and microphones n: int The number of image sources to use freqveq: nd-array The vector containing all the frequency points to compute Returns ------- An nd-array of size M x K x len(freqvec) containing all the transfer functions with first index for the microphones, second for the sources, third for frequency. ''' M = room.mic_array.R.shape[1] K = len(room.sources) F = freqvec.shape[0] partial_rirs = np.zeros((M, K, F), dtype=np.complex) mic_array_center = np.mean(room.mic_array.R, axis=1) for k, source in enumerate(room.sources): # set source ordering to nearest to center of microphone array source.set_ordering('nearest', ref_point=mic_array_center) sources = source[:n] # there is most likely a broadcast way of doing this for m in range(M): delays = pra.distance(room.mic_array.R[:, m, np.newaxis], sources.images) / c partial_rirs[m, k, :] = np.sum( np.exp(-2j * np.pi * delays * freqvec[:, np.newaxis]) / (c * delays) * sources.damping[np.newaxis, :], axis=1) / (4 * np.pi) return partial_rirs
# generates sources in the room at random locations # but ensure they are too close to microphones fp = parameters['floorplan'] bbox = np.array([[min(fp[0]), min(fp[1]), 0], [max(fp[0]), max(fp[1]), parameters['height']]]).T n_src_locs = parameters['n_src_locations'] # number of sources sources_locs = np.zeros((3, 0)) while sources_locs.shape[1] < n_src_locs: # new candidate location in the bounding box new_source = np.random.rand( 3, 1) * (bbox[:, 1] - bbox[:, 0])[:, None] + bbox[:, 0, None] # check the source are in the room is_in_room = room.is_inside(new_source[:, 0]) # check the source is not too close to the microphone mic_dist = pra.distance(mics_locs, new_source).min() distance_mic_ok = (parameters['dist_src_mic'][0] < mic_dist and mic_dist < parameters['dist_src_mic'][1]) select = is_in_room and distance_mic_ok if sources_locs.shape[1] > 0: distance_src_ok = (parameters['min_dist_src_src'] < pra.distance( sources_locs, new_source).min()) select = select and distance_src_ok if select: sources_locs = np.concatenate([sources_locs, new_source], axis=1) source_array = pra.MicrophoneArray(sources_locs, parameters['fs']) room.add_microphone_array(source_array)
# Simulate sound transport room.simulate() ############################### ## ENERGY BASED LOCALIZATION ## ############################### ''' Following: ENERGY-BASED POSITION ESTIMATION OF MICROPHONES AND SPEAKERS FOR AD HOC MICROPHONE ARRAYS Chen et al. 2007 ''' alpha_gt = 0.5 # Step 0 : A few groundtruth value to test D2_gt = pra.distance(device_locations, source_locations)**2 A_gt = np.zeros((n_devices, n_speakers)) sigma_gt = np.ones((n_devices, n_speakers)) for i in range(n_devices): for j in range(n_speakers): A_gt[i, j] = device_gains_db[i] - alpha_gt * np.log( D2_gt[i, j]) + source_gains_db[j] # Step 1 : Speaker segmentation A = np.zeros((n_devices, n_speakers)) sigma = np.zeros((n_devices, n_speakers)) for i in range(n_devices): for j in range(n_speakers): lo, hi = [int(x * fs_light) for x in active_segments[j] ] # We have perfect segmentation for now #A[i,j] = np.mean(devices.signals[i][lo:hi] / (20 / np.log(10))) # energy is already in log domain
def computeWeights(self, sources, interferers, R_n, delay=0.03, epsilon=5e-3): dist_mat = pra.distance(self.R, sources) s_time = dist_mat / pra.c s_dmp = 1. / (4 * np.pi * dist_mat) dist_mat = pra.distance(self.R, interferers) i_time = dist_mat / pra.c i_dmp = 1. / (4 * np.pi * dist_mat) offset = np.maximum(s_dmp.max(), i_dmp.max()) / (np.pi * self.Fs * epsilon) t_min = np.minimum(s_time.min(), i_time.min()) t_max = np.maximum(s_time.max(), i_time.max()) s_time -= t_min - offset i_time -= t_min - offset Lh = int((t_max - t_min + 2 * offset) * float(self.Fs)) if ((Lh - 1) > (self.M - 1) * self.Lg): import warnings wng = "Beamforming filters length (%d) are shorter than minimum required (%d)." % ( self.Lg, Lh) warnings.warn(wng, UserWarning) # the channel matrix Lg = self.Lg L = self.Lg + Lh - 1 H = np.zeros((Lg * self.M, 2 * L)) for r in np.arange(self.M): hs = pra.lowPassDirac(s_time[r, :, np.newaxis], s_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) row = np.pad(hs, ((0, L - len(hs))), mode='constant') col = np.pad(hs[:1], ((0, Lg - 1)), mode='constant') H[r * Lg:(r + 1) * Lg, 0:L] = toeplitz(col, row) hi = pra.lowPassDirac(i_time[r, :, np.newaxis], i_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) row = np.pad(hi, ((0, L - len(hi))), mode='constant') col = np.pad(hi[:1], ((0, Lg - 1)), mode='constant') H[r * Lg:(r + 1) * Lg, L:2 * L] = toeplitz(col, row) # the constraint vector kappa = int(delay * self.Fs) #kappa = np.minimum(int(0.6*(Lh+Lg)), int(2*t_max*self.Fs)) h = H[:, kappa] # We first assume the sample are uncorrelated R_xx = np.dot(H[:, :L], H[:, :L].T) K_nq = np.dot(H[:, L:], H[:, L:].T) + R_n # Compute the TD filters C = la.cho_factor(R_xx + K_nq, check_finite=False) g_val = la.cho_solve(C, h) g_val /= np.inner(h, g_val) self.filters = g_val.reshape((self.M, Lg)) ''' import matplotlib.pyplot as plt plt.figure() plt.subplot(2,1,1) plt.plot(np.arange(L)/float(self.Fs), np.dot(H[:,:L].T, g_val)) plt.plot(np.arange(L)/float(self.Fs), np.dot(H[:,L:].T, g_val)) plt.legend(('Channel of desired source','Channel of interferer')) plt.subplot(2,1,2) for m in np.arange(self.M): plt.plot(np.arange(Lh)/float(self.Fs), H[m*Lg,:Lh]) ''' # compute and return SNR num = np.inner(g_val.T, np.dot(R_xx, g_val)) denom = np.inner(np.dot(g_val.T, K_nq), g_val) return num / denom
def computeWeights(self, sources, interferers, R_n, epsilon=5e-3): dist_mat = pra.distance(self.R, sources) s_time = dist_mat / pra.c s_dmp = 1. / (4 * np.pi * dist_mat) dist_mat = pra.distance(self.R, interferers) i_time = dist_mat / pra.c i_dmp = 1. / (4 * np.pi * dist_mat) # compute offset needed for decay of sinc by epsilon offset = np.maximum(s_dmp.max(), i_dmp.max()) / (np.pi * self.Fs * epsilon) t_min = np.minimum(s_time.min(), i_time.min()) t_max = np.maximum(s_time.max(), i_time.max()) # adjust timing s_time -= t_min - offset i_time -= t_min - offset Lh = np.ceil((t_max - t_min + 2 * offset) * float(self.Fs)) # the channel matrix K = sources.shape[1] Lg = self.Lg off = (Lg - Lh) / 2 L = self.Lg + Lh - 1 H = np.zeros((Lg * self.M, 2 * L)) As = np.zeros((Lg * self.M, K)) for r in np.arange(self.M): # build constraint matrix hs = pra.lowPassDirac(s_time[r, :, np.newaxis], s_dmp[r, :, np.newaxis], self.Fs, Lh)[:, ::-1] As[r * Lg + off:r * Lg + Lh + off, :] = hs.T # build interferer RIR matrix hx = pra.lowPassDirac(s_time[r, :, np.newaxis], s_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, :L] = pra.convmtx(hx, Lg).T # build interferer RIR matrix hq = pra.lowPassDirac(i_time[r, :, np.newaxis], i_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, L:] = pra.convmtx(hq, Lg).T ones = np.ones((K, 1)) # We first assume the sample are uncorrelated K_x = np.dot(H[:, :L], H[:, :L].T) K_nq = np.dot(H[:, L:], H[:, L:].T) + R_n # Compute the TD filters K_nq_inv = np.linalg.inv(K_x + K_nq) C = np.dot(K_nq_inv, As) B = np.linalg.inv(np.dot(As.T, C)) g_val = np.dot(C, np.dot(B, ones)) self.filters = g_val.reshape((self.M, Lg)) import matplotlib.pyplot as plt plt.figure() plt.subplot(3, 1, 1) plt.plot(np.arange(L) / float(self.Fs), np.dot(H[:, :L].T, g_val)) plt.plot(np.arange(L) / float(self.Fs), np.dot(H[:, L:].T, g_val)) plt.legend(('Channel of desired source', 'Channel of interferer')) plt.subplot(3, 1, 2) for m in np.arange(self.M): plt.plot(np.arange(Lh) / float(self.Fs), H[m * Lg, :Lh]) plt.subplot(3, 1, 3) for m in np.arange(self.M): plt.plot(np.arange(Lh) / float(self.Fs), H[m * Lg, L:L + Lh]) # compute and return SNR A = np.dot(g_val.T, H[:, :L]) num = np.dot(A, A.T) denom = np.dot(np.dot(g_val.T, K_nq), g_val) return num / denom
def computeWeights(self, sources, interferers, R_n, delay=0.03, epsilon=5e-3): dist_mat = pra.distance(self.R, sources) s_time = dist_mat / pra.c s_dmp = 1. / (4 * np.pi * dist_mat) dist_mat = pra.distance(self.R, interferers) i_time = dist_mat / pra.c i_dmp = 1. / (4 * np.pi * dist_mat) # compute offset needed for decay of sinc by epsilon offset = np.maximum(s_dmp.max(), i_dmp.max()) / (np.pi * self.Fs * epsilon) t_min = np.minimum(s_time.min(), i_time.min()) t_max = np.maximum(s_time.max(), i_time.max()) # adjust timing s_time -= t_min - offset i_time -= t_min - offset Lh = int((t_max - t_min + 2 * offset) * float(self.Fs)) # the channel matrix K = sources.shape[1] Lg = self.Lg off = (Lg - Lh) / 2 L = self.Lg + Lh - 1 H = np.zeros((Lg * self.M, 2 * L)) for r in np.arange(self.M): # build interferer RIR matrix hx = pra.lowPassDirac(s_time[r, :, np.newaxis], s_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, :L] = pra.convmtx(hx, Lg).T # build interferer RIR matrix hq = pra.lowPassDirac(i_time[r, :, np.newaxis], i_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, L:] = pra.convmtx(hq, Lg).T # We first assume the sample are uncorrelated K_nq = np.dot(H[:, L:], H[:, L:].T) + R_n # constraint kappa = int(delay * self.Fs) kappa = (Lh + Lg) / 2 A = H[:, :L] b = np.zeros((L, 1)) b[kappa, 0] = 1 # filter computation C = la.cho_factor(K_nq, overwrite_a=True, check_finite=False) B = la.cho_solve(C, A) D = np.dot(A.T, B) C = la.cho_factor(D, overwrite_a=True, check_finite=False) x = la.cho_solve(C, b) g_val = np.dot(B, x) # reshape and store self.filters = g_val.reshape((self.M, self.Lg)) ''' import matplotlib.pyplot as plt plt.figure() plt.plot(np.arange(L)/float(self.Fs), np.dot(H[:,:L].T, g_val)) plt.plot(np.arange(L)/float(self.Fs), np.dot(H[:,L:].T, g_val)) plt.legend(('Channel of desired source','Channel of interferer')) ''' # compute and return SNR A = np.dot(g_val.T, H[:, :L]) num = np.dot(A, A.T) denom = np.dot(np.dot(g_val.T, K_nq), g_val) return num / denom
def computeWeights(self, sources, interferers, R_n, delay=None, epsilon=5e-3): dist_mat = pra.distance(self.R, sources) s_time = dist_mat / pra.c s_dmp = 1. / (4 * np.pi * dist_mat) dist_mat = pra.distance(self.R, interferers) i_time = dist_mat / pra.c i_dmp = 1. / (4 * np.pi * dist_mat) # compute offset needed for decay of sinc by epsilon offset = np.maximum(s_dmp.max(), i_dmp.max()) / (np.pi * self.Fs * epsilon) t_min = np.minimum(s_time.min(), i_time.min()) t_max = np.maximum(s_time.max(), i_time.max()) # adjust timing s_time -= t_min - offset i_time -= t_min - offset Lh = int((t_max - t_min + 2 * offset) * float(self.Fs)) # the channel matrix K = sources.shape[1] Lg = self.Lg off = (Lg - Lh) / 2 L = self.Lg + Lh - 1 H = np.zeros((Lg * self.M, 2 * L)) for r in np.arange(self.M): # build interferer RIR matrix hx = pra.lowPassDirac(s_time[r, :, np.newaxis], s_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, :L] = pra.convmtx(hx, Lg).T # build interferer RIR matrix hq = pra.lowPassDirac(i_time[r, :, np.newaxis], i_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, L:] = pra.convmtx(hq, Lg).T # We first assume the sample are uncorrelated K_s = np.dot(H[:, :L], H[:, :L].T) K_nq = np.dot(H[:, L:], H[:, L:].T) + R_n # Compute TD filters using generalized Rayleigh coefficient maximization SINR, v = la.eigh(K_s, b=K_nq, eigvals=(self.M * Lg - 1, self.M * Lg - 1), overwrite_a=True, overwrite_b=True, check_finite=False) g_val = np.real(v[:, 0]) self.filters = g_val.reshape((self.M, Lg)) ''' import matplotlib.pyplot as plt plt.figure() plt.plot(np.arange(L)/float(self.Fs), np.dot(H[:,:L].T, g_val)) plt.plot(np.arange(L)/float(self.Fs), np.dot(H[:,L:].T, g_val)) plt.legend(('Channel of desired source','Channel of interferer')) ''' # compute and return SNR return SINR[0]
def computeWeights(self, sources, interferers, R_n, delay=0.03, epsilon=5e-3): dist_mat = pra.distance(self.R, sources) s_time = dist_mat / pra.c s_dmp = 1. / (4 * np.pi * dist_mat) dist_mat = pra.distance(self.R, interferers) i_time = dist_mat / pra.c i_dmp = 1. / (4 * np.pi * dist_mat) # compute offset needed for decay of sinc by epsilon offset = np.maximum(s_dmp.max(), i_dmp.max()) / (np.pi * self.Fs * epsilon) t_min = np.minimum(s_time.min(), i_time.min()) t_max = np.maximum(s_time.max(), i_time.max()) # adjust timing s_time -= t_min - offset i_time -= t_min - offset Lh = int((t_max - t_min + 2 * offset) * float(self.Fs)) # the channel matrix K = sources.shape[1] Lg = self.Lg off = (Lg - Lh) / 2 L = self.Lg + Lh - 1 H = np.zeros((Lg * self.M, 2 * L)) for r in np.arange(self.M): # build interferer RIR matrix hx = pra.lowPassDirac(s_time[r, :, np.newaxis], s_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, :L] = pra.convmtx(hx, Lg).T # build interferer RIR matrix hq = pra.lowPassDirac(i_time[r, :, np.newaxis], i_dmp[r, :, np.newaxis], self.Fs, Lh).sum(axis=0) H[r * Lg:(r + 1) * Lg, L:] = pra.convmtx(hq, Lg).T # Delay of the system in samples kappa = int(delay * self.Fs) precedence = int(0.030 * self.Fs) # the constraint n = np.minimum(L, kappa + precedence) Hnc = H[:, :kappa] Hpr = H[:, kappa:n] Hc = H[:, n:L] A = np.dot(Hpr, Hpr.T) B = np.dot(Hnc, Hnc.T) + np.dot(Hc, Hc.T) + np.dot( H[:, L:], H[:, L:].T) + R_n # solve the problem SINR, v = la.eigh(A, b=B, eigvals=(self.M * Lg - 1, self.M * Lg - 1), overwrite_a=True, overwrite_b=True, check_finite=False) g_val = np.real(v[:, 0]) # reshape and store self.filters = g_val.reshape((self.M, self.Lg)) import matplotlib.pyplot as plt plt.figure() plt.subplot(3, 1, 1) plt.plot(np.arange(L) / float(self.Fs), np.dot(H[:, :L].T, g_val)) plt.plot(np.arange(L) / float(self.Fs), np.dot(H[:, L:].T, g_val)) plt.legend(('Channel of desired source', 'Channel of interferer')) plt.subplot(3, 1, 2) plt.plot(np.abs(np.fft.rfft(np.dot(H[:, :L].T, g_val)))) plt.subplot(3, 1, 3) for m in np.arange(self.M): plt.plot(np.abs(np.fft.rfft(H[m * self.Lg, :L]))) # compute and return SNR return SINR[0]
def energy_localization(A, sigma, mics_locations, n_iter=100, verbose=False): ''' Energy based localization Parameters ---------- A : array_like (n_microphones, n_sources) A matrix containing the log-energy of the sources at given sensors. A[m,k] contains the energy of the k-th source at the m-th microphone. sigma : array_like (n_microphones, n_sources) A matrix containing the noise standard deviations. R : array_like (n_dim, n_microphones) The location of the microphones verbose : bool, optional Printout stuff Returns ------- gains : The gains of the microphones powers : The source powers source_locations : The location of the sources ''' n_sources, n_mics = A.shape assert A.shape == sigma.shape, 'A and sigma should have the same shape' assert mics_locations.shape[1] == A.shape[0], 'The number of rows in A should be the same as the number of microphones' n_dim = mics_locations.shape[0] assert n_dim == 2 or n_dim == 3, 'Only 2D and 3D support' # Step 1 : Initialization ######################### var_shapes = [(n_mics), (n_sources), (n_dim, n_mics), (n_dim, n_sources), (1,), (1,)] packer = VarPacker(var_shapes) x0 = packer.new_vector() m0, s0, R0, X0, alpha0, scale0 = packer.unpack(x0) alpha0[:] = 0.5 C = np.zeros((n_mics, n_sources)) # log attenuation of speaker at microphone D2 = np.zeros((n_mics, n_sources)) # squared distance between speaker and microphone for i in range(n_mics): for j in range(n_sources): if i < n_sources: C[i,j] = 0.5 * (A[i,j] + A[j,i] - A[i,i] - A[j,j]) else: C[i,j] = A[i,j] - A[j,j] D2[i,j] = np.exp(-C[i,j] / alpha0[0]) # In practice, we'll need to find a way to fix the scale # log gain of device m0[0] = 0. # i.e. m[0] = log(1) for i in range(1,n_mics): m0[i] = A[i,0] - A[0,0] - C[i,0] + m0[0] # energy of speaker for j in range(n_sources): s0[j] = np.mean(A[:,j] - m0[:] - C[:,j]) # STEP 2 : SRLS for intial estimate of microphone locations ########################################################### # Fix microphone locations R0[:,:] = mics_locations # We can do some alternating optimization here # by increasing the number of loops scale = 1. pre_n_iter = 3 for i in range(pre_n_iter): # Use SRLS to find intial guess of locations scalings = np.zeros(n_sources) for j in range(n_sources): X0[:,j], scalings[j] = rescaled_SRLS(R0.T, np.ones(n_mics), D2[:,j,None]) A[:,:] += alpha0[0] * np.log(scalings[j]) scale = np.sqrt(np.median(scalings)) # Reinitialize gains from the SRLS distances S = A + alpha0[0] * np.log(pra.distance(R0, X0)**2) m0[:], s0[:] = cdm_unfolding(1 / sigma**2, S, sum_matrix=True) D2 = np.exp((- A + m0[:,None] + s0[None,:]) ) scale0[:] = 0. # STEP 4 : Non-linear least-squares ################################### # Create a variable to loop over x = packer.new_vector() m, s, R, X, alpha, scale = packer.unpack(x) x[:] = x0 # keep track of minimum x_opt = packer.new_vector() cost_opt = np.inf for i in range(n_iter): # noise injection if i > 0: m[:] += np.random.randn(*m.shape) * 0.01 * np.std(m) s[:] += np.random.randn(*s.shape) * 0.01 * np.std(s) X[:,:] += np.random.randn(*X.shape) * 0.30 * np.std(X) # Non-linear least squares solver res_1 = least_squares(objective, x, jac=jacobian, args=(A, sigma, packer), kwargs={'fix_mic' : True, 'fix_alpha' : False, 'fix_scale' : True}, xtol=1e-15, ftol=1e-15, max_nfev=100, method='lm', verbose=verbose, ) if res_1.cost < cost_opt: x_opt[:] = res_1.x cost_opt = res_1.cost # use result as next initialization x[:] = res_1.x m, s, R, X, alpha, scale = packer.unpack(x_opt) return m, s, X, X0
# generates sources in the room at random locations # but ensure they are too close to microphones bbox = np.array([[min(fp[0]), min(fp[1]), 0], [max(fp[0]), max(fp[1]), height]]).T n_src_locs = n_src_locations # number of sources sources_locs = np.zeros((3, 0)) while sources_locs.shape[1] < n_src_locs: # new candidate location in the bounding box new_source = np.random.rand( 3, 1) * (bbox[:, 1] - bbox[:, 0])[:, None] + bbox[:, 0, None] # check the source are in the room is_in_room = room.is_inside(new_source[:, 0]) # check the source is not too close to the microphone mic_dist = pra.distance(mics_locs, new_source).min() distance_mic_ok = (dist_src_mic[0] < mic_dist and mic_dist < dist_src_mic[1]) select = is_in_room and distance_mic_ok if sources_locs.shape[1] > 0: distance_src_ok = (min_dist_src_src < pra.distance( sources_locs, new_source).min()) select = select and distance_src_ok if select: sources_locs = np.concatenate([sources_locs, new_source], axis=1) print('Source distances', np.linalg.norm(sources_locs[:, 0] - sources_locs[:, 1]))