def split_frame_m(): c = 343 frame_count = 2048 data, fs = load_audio_data( r'D:\projects\pyprojects\soundphase\calib\0\0.wav', 'wav') skip_time = int(fs * 1) data = data[skip_time:, :-1].T # search unit circle level = 4 grid: np.ndarray = np.load(rf'grid/{level}.npz')['grid'] # mic mem pos pos = cons_uca(0.043) for i in range(0, data.shape[1], frame_count): data_seg = data[:, i:i + frame_count] # 噪声不做,随便写的 if np.max(abs(fft(data_seg[0] / len(data_seg[0])))) < 1: continue print('time: ', (skip_time + i) / fs) t1 = time.time() pair_tau, stack_fft = calculate_stack_fft_and_pairs_tau( data_seg, pos, grid, c) t2 = time.time() print('previous calculation time consumption: ', t2 - t1) t1 = time.time() E = srp_phat_m(data_seg, pos, stack_fft, pair_tau, fs) # E = srp_phat_previous_tau(data_seg, pos, grid, pairs_tau, fs) t2 = time.time() print('srp_phat time consumption: ', t2 - t1) sdevc = grid[np.argmax(E, axis=1)] # source direction vector # print(sdevc) print('angle of max val: ', np.rad2deg(vec2theta(sdevc))) print('=' * 50)
def split_frame(): c = 343 frame_count = 1024 data, fs = load_audio_data( r'D:\projects\pyprojects\gesturerecord\location\sound\0.wav', 'wav') skip_time = int(fs * 1) data = data[skip_time:, :-1].T # search unit circle level = 3 grid: np.ndarray = np.load(rf'grid/{level}_north.npz')['grid'] # mic mem pos pos = cons_uca(0.043) # calculate tau previously # pairs_tau = calculate_pairs_tau(pos, grid, c) for i in range(0, data.shape[1], frame_count): data_seg = data[:, i:i + frame_count] # 噪声不做,随便写的 if np.max(abs(fft(data_seg[0] / len(data_seg[0])))) < 10: continue print('time: ', (skip_time + i) / fs) t1 = time.time() E = srp_phat(data_seg, pos, grid, c, fs) # E = srp_phat_muti_thread(data, pos, grid, c, fs) # E = srp_phat_previous_tau(data_seg, pos, grid, pairs_tau, fs) t2 = time.time() print('srp_phat time consumption: ', t2 - t1) sdevc = grid[np.argmax(E, axis=1)] # source direction vector # print(sdevc) print('angle of max val: ', np.rad2deg(vec2theta(sdevc))) print('=' * 50)
def srp_fft_denoise_test(): frame_len = 2048 # 加窗窗口 window = np.hanning(frame_len) # window = np.ones(frame_len) # window = np.hamming(frame_len) data, fs = load_audio_data(r'D:\projects\pyprojects\gesturerecord\location\20khz\0.wav', 'wav') data = data[fs * 1:, :-1].T data_filter = butter_bandpass_filter(data, 19e3, 21e3) # for f in data_filter: # normalized_signal_fft(f) t_fs_n = 40000 noise_data = data_filter[:, t_fs_n:t_fs_n+frame_len] t_fs_t = 48000 * 5 + 1000 test_data = data_filter[:, t_fs_t:t_fs_t+frame_len] # denoise_fft(fft(test_data)/frame_len, fft(noise_data)/frame_len) mic_array_pos = cons_uca(0.043) c = 343 level = 4 grid = np.load(rf'grid/{level}_north.npz')['grid'] noise_fft = fft(noise_data * window) E = srp_phat_denoise(test_data, mic_array_pos, grid, c, fs, noise_fft, window) sdevc = grid[np.argmax(E, axis=1)] # source direction vector # print(sdevc) print('angle of max val: ', np.rad2deg(vec2theta(sdevc))) print('=' * 50)
def extract_phasedata_from_audio_special_for_onemic(audio_file, phasedata_save_file, audio_type='wav', mic_array=True): origin_data, fs = load_audio_data(audio_file, audio_type) fs = fs # 采样率 data = origin_data.reshape((-1, 8)) data = data.T # shape = (num_of_channels, all_frames) data = data[:, int(fs * DELAY_TIME):] mic_num = 0 # 只用一个mic data = data[mic_num, :] data = data.reshape((1, -1)) # 开始处理数据 t = 0 magnti_list = [] for i in range(NUM_OF_FREQ): fc = F0 + i * STEP data_filter = butter_bandpass_filter(data, fc - 150, fc + 150) I_raw, Q_raw = get_cos_IQ_raw(data_filter, fc, fs) # 滤波+下采样 I = move_average_overlap_filter(I_raw) Q = move_average_overlap_filter(Q_raw) # denoise decompositionQ = seasonal_decompose(Q.T, period=10, two_sided=False) trendQ = decompositionQ.trend decompositionI = seasonal_decompose(I.T, period=10, two_sided=False) trendI = decompositionI.trend trendQ = trendQ.T trendI = trendI.T assert trendI.shape == trendQ.shape if len(trendI.shape) == 1: trendI = trendI.reshape((1, -1)) trendQ = trendQ.reshape((1, -1)) trendQ = trendQ[:, 10:] trendI = trendI[:, 10:] magnti = get_phase(trendI, trendQ) # 这里的展开目前没什么效果 # plt.plot(magnti[0]) # plt.show() assert magnti.shape[1] > 1 # 用diff,和两次diff magnti_list.append(np.diff(magnti)[:, :-1]) # plt.plot(np.diff(magnti).reshape(-1)) # plt.show() magnti_list.append(np.diff(np.diff(magnti))) merged_u_p = np.array(magnti_list).reshape((NUM_OF_FREQ * 1 * 2, -1)) print(merged_u_p.shape) # 压缩便于保存 flattened_m_u_p = merged_u_p.flatten() # 由于长短不一,不能放在一起 # np.savetxt(dataset_save_file, flattened_m_u_p.reshape(1, -1)) np.savez_compressed(phasedata_save_file, phasedata=flattened_m_u_p) return 1
def compare(): LENG = 2048 data, fs = load_audio_data(r'D:\projects\pyprojects\gesturerecord\location\sound\0.wav', 'wav') # data = butter_bandpass_filter(data.T, 15e3, 23e3) data = data.T t = 3 data = data[:-1, int(fs * t):int(fs * t) + LENG] # # for i, d in enumerate(data): # plt.subplot(4, 2, i + 1) # plt.plot(d) # plt.show() i = 0 j = 1 a = data[i] b = data[j] y = gcc_phat(a, b) print('ifft max ccor val: ', np.max(y)) print('ifft delay of sample num: ', np.argmax(y)) plt.figure() plt.plot(y) plt.title('ifft gcc') mic_array_pos = cons_uca(0.043) c = 343 level = 4 grid = np.load(rf'grid/{level}.npz')['grid'] tau = get_steering_vector(mic_array_pos[i], mic_array_pos[j], c, grid) R = gcc_phat_search(a, b, fs, tau) # 画射线图 # plot_angspect(R[0], grid, percentile=99) # r = R[0] # sorted_arg = np.argsort(r)[::-1] # print(r[sorted_arg]) print('gccphat search max val: ', np.max(R)) print('gccphat delay of sample num: ', tau[np.argmax(R)] * fs) max_p = grid[np.argmax(R)] print('point of max val: ', max_p) print('angle of max val: ', np.rad2deg(vec2theta([max_p]))) plt.figure() plt.plot(R.reshape(-1)) plt.title('gcc search') # plt.figure() # plt.plot(np.correlate(a,b,'full')) plt.show()
def generate_training_data_pcm(audio_file, dataset_save_file): origin_data, fs = load_audio_data(audio_file, 'pcm') nchannels = 1 # 声道数 fs = fs # 采样率 # 开始处理数据 t = 0 f0 = 17350 for win in range(CHUNK, len(origin_data), CHUNK): # 读取下一段数据 data = origin_data[win - CHUNK:win + CHUNK] t = t + CHUNK / fs # print(f"time:{t}s") # 由于麦克风的原因只看2s之后的 if t < DELAY_TIME: continue unwrapped_phase_list = [] data = data.reshape((-1, nchannels)) data = data.T # shape = (num_of_channels, 2 * CHUNK) if data.shape[1] < 2 * CHUNK: continue # 处理数据,这里可以优化,还需要验证其正确性 for i in range(NUM_OF_FREQ): fc = f0 + i * STEP data_filter = butter_bandpass_filter(data, fc - 250, fc + 250) I, Q = get_cos_IQ(data_filter, fc, fs) unwrapped_phase = get_phase(I, Q) # 这里的展开目前没什么效果 # plt.plot(unwrapped_phase[0]) # plt.show() # 通过标准差判断是否在运动 assert unwrapped_phase.shape[1] > 1 u_p_stds = np.std(unwrapped_phase, axis=1) print(fc, np.mean(u_p_stds)) if np.mean(u_p_stds) > STD_THRESHOLD: unwrapped_phase_list.append(unwrapped_phase) # 把8个频率的合并成一个矩阵 shape = (num_of_channels * NUM_OF_FREQ, CHUNK) if len(unwrapped_phase_list) != NUM_OF_FREQ: continue print(f"time:{t}s") merged_u_p = np.vstack(([u_p for u_p in unwrapped_phase_list])) # 压缩便于保存 flattened_m_u_p = merged_u_p.flatten() with open(dataset_save_file, 'ab') as f: np.savetxt(f, flattened_m_u_p.reshape(1, -1))
def fft_denoise_test(): frame_len = 2048 # 加窗窗口 window = np.hanning(frame_len) # window = np.ones(frame_len) # window = np.hamming(frame_len) data, fs = load_audio_data(r'D:\projects\pyprojects\gesturerecord\location\20khz\0.wav', 'wav') data = data[fs * 1:, :-1].T data_filter = butter_bandpass_filter(data, 19e3, 21e3) # for f in data_filter: # normalized_signal_fft(f) t_fs_n = 40000 noise_data = data_filter[:, t_fs_n:t_fs_n+frame_len] t_fs_t = 48000 * 5 + 1000 test_data = data_filter[:, t_fs_t:t_fs_t+frame_len] # denoise_fft(fft(test_data)/frame_len, fft(noise_data)/frame_len) i = 0 j = 1 mic_array_pos = cons_uca(0.043) c = 343 level = 4 grid = np.load(rf'grid/{level}.npz')['grid'] tau = get_steering_vector(mic_array_pos[i], mic_array_pos[j], c, grid) noise_fft = fft(noise_data * window) R = gcc_phat_search_fft_denoise(test_data[i], test_data[j], fs, tau, noise_fft[i], noise_fft[j], window) print('gccphat search max val: ', np.max(R)) print('gccphat delay of sample num: ', tau[np.argmax(R)] * fs) max_p = grid[np.argmax(R)] print('point of max val: ', max_p) print('angle of max val: ', np.rad2deg(vec2theta([max_p]))) plt.figure() plt.plot(R.reshape(-1)) plt.title('gcc search') # plt.figure() # plt.plot(np.correlate(a,b,'full')) plt.show()
def beamform_on_raw_audio_data(filename): data, fs = load_audio_data(filename, 'wav') data = data.T data = data[:7, int(fs * DELAY_TIME):] a_angel = 240 e_angel = 0 two_d_angel = [[np.deg2rad(a_angel), np.deg2rad(e_angel)]] c = 343 spacing = 0.043 mic_array_pos = cons_uca(spacing) sd = steering_plane_wave(mic_array_pos, c, two_d_angel) beamformed_data = beamform_real(data, sd).reshape(1, -1) beamformed_data_2 = ump_8_beamform(data, 48000, two_d_angel).reshape(1, -1) phase_list = [] for i in range(NUM_OF_FREQ): fc = F0 + i * STEP data_filter = butter_bandpass_filter(data, fc - 150, fc + 150) I_raw, Q_raw = get_cos_IQ_raw(data_filter, fc, fs) # 滤波+下采样 I = move_average_overlap_filter(I_raw) Q = move_average_overlap_filter(Q_raw) decompositionQ = seasonal_decompose(Q.T, period=10, two_sided=False) trendQ = decompositionQ.trend decompositionI = seasonal_decompose(I.T, period=10, two_sided=False) trendI = decompositionI.trend trendQ = trendQ.T trendI = trendI.T # trendQ = Q # trendI = I assert trendI.shape == trendQ.shape if len(trendI.shape) == 1: trendI = trendI.reshape((1, -1)) trendQ = trendQ.reshape((1, -1)) # trendQ = trendQ[:, 10:] # trendI = trendI[:, 10:] trendQ = Q trendI = I # draw_circle(trendI[0], trendQ[0]) raw_phase = get_phase(trendI, trendQ) ''' 对相位beamform ''' noise = np.mean(raw_phase[:, 20:60], axis=1).reshape(-1, 1) print(noise.shape) raw_phase_denoised = raw_phase - noise bphase_denoised = beamform_real(raw_phase_denoised, sd).reshape(1, -1) def normalize_max_min(x): max = np.max(x) min = np.min(x) return (x - min) / (max - min) ''' 对beamformed signal求相位 ''' beamformed_data_filter = butter_bandpass_filter( beamformed_data, fc - 150, fc + 150) bI_raw, bQ_raw = get_cos_IQ_raw(beamformed_data_filter, fc, fs) bI = move_average_overlap_filter(bI_raw) bQ = move_average_overlap_filter(bQ_raw) bphase = get_phase(bI, bQ) beamformed_data_filter = butter_bandpass_filter( beamformed_data_2, fc - 150, fc + 150) bI_raw, bQ_raw = get_cos_IQ_raw(beamformed_data_filter, fc, fs) bI = move_average_overlap_filter(bI_raw) bQ = move_average_overlap_filter(bQ_raw) bphase_2 = get_phase(bI, bQ) plt.figure() plt.subplot(2, 1, 1) plt.plot(raw_phase[0]) plt.subplot(2, 1, 2) plt.plot(bphase_denoised[0]) plt.show()
def beamform_after_IQ(filename, start, dur): data, fs = load_audio_data(filename, 'wav') data = data.T data = data[:7, int(fs * DELAY_TIME):] # data = data[:7, start:start+dur] phase_list = [] for i in range(NUM_OF_FREQ): fc = F0 + i * STEP data_filter = butter_bandpass_filter(data, fc - 150, fc + 150) I_raw, Q_raw = get_cos_IQ_raw(data_filter, fc, fs) # 滤波+下采样 I = move_average_overlap_filter(I_raw) Q = move_average_overlap_filter(Q_raw) # I = butter_lowpass_filter(I_raw, 150) # Q = butter_lowpass_filter(Q_raw, 150) # I = I[:, 5:-5] # Q = Q[:, 5:-5] # plt.plot(I[0][5:-5]) # plt.plot(Q[0][5:-5]) # plt.show() # denoise decompositionQ = seasonal_decompose(Q.T, period=10, two_sided=False) trendQ = decompositionQ.trend decompositionI = seasonal_decompose(I.T, period=10, two_sided=False) trendI = decompositionI.trend trendQ = trendQ.T trendI = trendI.T # trendQ = Q # trendI = I assert trendI.shape == trendQ.shape if len(trendI.shape) == 1: trendI = trendI.reshape((1, -1)) trendQ = trendQ.reshape((1, -1)) # trendQ = trendQ[:, 10:] # trendI = trendI[:, 10:] trendQ = Q trendI = I # draw_circle(trendI[0], trendQ[0]) raw_phase = get_phase(trendI, trendQ) exp_phase = trendI + 1j * trendQ ''' 去除噪声,减去平均值(尝试) ''' mean_noise = np.mean(raw_phase[:1000], axis=1) # beamform a_angel = 120 e_angel = 0 two_d_angel = [[np.deg2rad(a_angel), np.deg2rad(e_angel)]] c = 343 spacing = 0.043 mic_array_pos = cons_uca(spacing) # sp = music(data, mic_array_pos, fc, c, np.arange(0, 360), np.arange(0, 30), 1) # # plt.plot(sp.reshape(-1)) # plt.pcolormesh(sp) # plt.show() azumi = (0, 360) eleva = (0, 90) # beamscan_spectrum = np.zeros((azumi[1] - azumi[0], eleva[1] - eleva[0])) # # 估计 # for angel_1 in range(azumi[0], azumi[1]): # for angel_2 in range(eleva[0], eleva[1]): # two_angel = [[np.deg2rad(angel_1), np.deg2rad(angel_2)]] # sd = steering_plane_wave(mic_array_pos, c, two_angel) # adjust = np.exp(-1j * 2 * np.pi * fc * sd) # syn_signals = exp_phase * adjust.T # # syn_signals = np.real(syn_signals) # beamformed_signal = np.sum(syn_signals, axis=0) # beamscan_spectrum[angel_1][angel_2] = np.sum(abs(beamformed_signal)) # # return beamscan_spectrum # # plt.pcolormesh(beamscan_spectrum) # # plt.show() # plt.plot(beamscan_spectrum[:, 0]) # plt.grid() # plt.show() sd = steering_plane_wave(mic_array_pos, c, two_d_angel) adjust = np.exp(-1j * 2 * np.pi * fc * sd).T assert exp_phase.shape[0] == adjust.shape[0] syn_signals = exp_phase * adjust beamformed_signal = np.sum(syn_signals, axis=0).reshape(1, -1) phase = get_phase(np.real(beamformed_signal), np.imag(beamformed_signal)) plt.show() plt.figure() plt.subplot(2, 1, 1) plt.plot(raw_phase[0]) plt.subplot(2, 1, 2) plt.plot(phase[0]) plt.show() phase_list.append(phase)
def extract_magndata_from_beamformed_audio(audio_file, phasedata_save_file, audio_type='pcm', mic_array=False): origin_data, fs = load_audio_data(audio_file, audio_type) fs = fs # 采样率 # 已经reshape过了为什么还要reshape if mic_array: data = origin_data.reshape((-1, N_CHANNELS + 1)) else: data = origin_data.reshape((-1, N_CHANNELS)) data = data.T # shape = (num_of_channels, all_frames) data = data[:, int(fs * DELAY_TIME):] if mic_array: # 第八个声道不要 data = data[:7, :] # beamform,角度? data = ump_8_beamform(data, fs, angel=[[np.pi * 4 / 3, 0]]) assert data.shape[0] == 1 # 开始处理数据 t = 0 magnti_list = [] for i in range(NUM_OF_FREQ): fc = F0 + i * STEP data_filter = butter_bandpass_filter(data, fc - 150, fc + 150) I_raw, Q_raw = get_cos_IQ_raw(data_filter, fc, fs) # 滤波+下采样 I = move_average_overlap_filter(I_raw) Q = move_average_overlap_filter(Q_raw) # denoise decompositionQ = seasonal_decompose(Q.T, period=10, two_sided=False) trendQ = decompositionQ.trend decompositionI = seasonal_decompose(I.T, period=10, two_sided=False) trendI = decompositionI.trend trendQ = trendQ.T trendI = trendI.T assert trendI.shape == trendQ.shape if len(trendI.shape) == 1: trendI = trendI.reshape((1, -1)) trendQ = trendQ.reshape((1, -1)) trendQ = trendQ[:, 10:] trendI = trendI[:, 10:] magnti = get_magnitude(trendI, trendQ) # 这里的展开目前没什么效果 # plt.plot(np.diff(magnti[0].reshape(-1))) # plt.show() assert magnti.shape[1] > 1 # 用diff,和两次diff magnti_list.append(np.diff(magnti)[:, :-1]) # plt.plot(np.diff(magnti).reshape(-1)) # plt.show() magnti_list.append(np.diff(np.diff(magnti))) merged_u_p = np.array(magnti_list).reshape((NUM_OF_FREQ * 1 * 2, -1)) print(merged_u_p.shape) # 压缩便于保存 flattened_m_u_p = merged_u_p.flatten() # 由于长短不一,不能放在一起 # np.savetxt(dataset_save_file, flattened_m_u_p.reshape(1, -1)) np.savez_compressed(phasedata_save_file, phasedata=flattened_m_u_p) return 1
def extract_phasedata_from_audio(audio_file, phasedata_save_file, audio_type='pcm', mic_array=False): ''' 目前使用的是对相位取一次差分 :param audio_file: :param phasedata_save_file: :param audio_type: :param mic_array: :return: ''' origin_data, fs = load_audio_data(audio_file, audio_type) fs = fs # 采样率 # data = origin_data[int(fs * DELAY_TIME):] # data = data.reshape((-1, N_CHANNELS)) # data = data.T # shape = (num_of_channels, all_frames) if mic_array: data = origin_data.reshape((-1, N_CHANNELS + 1)) else: data = origin_data.reshape((-1, N_CHANNELS)) data = data.T # shape = (num_of_channels, all_frames) data = data[:, int(fs * DELAY_TIME):] if mic_array: # 第八个声道不要 data = data[:7, :] # 开始处理数据 t = 0 unwrapped_phase_list = [] for i in range(NUM_OF_FREQ): fc = F0 + i * STEP data_filter = butter_bandpass_filter(data, fc - 150, fc + 150) I_raw, Q_raw = get_cos_IQ_raw(data_filter, fc, fs) # 滤波+下采样 I = move_average_overlap_filter(I_raw) Q = move_average_overlap_filter(Q_raw) # denoise decompositionQ = seasonal_decompose(Q.T, period=10, two_sided=False) trendQ = decompositionQ.trend decompositionI = seasonal_decompose(I.T, period=10, two_sided=False) trendI = decompositionI.trend trendQ = trendQ.T trendI = trendI.T assert trendI.shape == trendQ.shape if len(trendI.shape) == 1: trendI = trendI.reshape((1, -1)) trendQ = trendQ.reshape((1, -1)) trendQ = trendQ[:, 10:] trendI = trendI[:, 10:] unwrapped_phase = get_phase(trendI, trendQ) # 这里的展开目前没什么效果 # plt.plot(unwrapped_phase[0]) # plt.show() assert unwrapped_phase.shape[1] > 1 # 用diff,和两次diff unwrapped_phase_list.append(np.diff(unwrapped_phase)[:, :-1]) # plt.plot(np.diff(unwrapped_phase).reshape(-1)) # plt.show() # unwrapped_phase_list.append(np.diff(np.diff(unwrapped_phase))) merged_u_p = np.array(unwrapped_phase_list).reshape( (NUM_OF_FREQ * N_CHANNELS * 1, -1)) print(merged_u_p.shape) # 压缩便于保存 flattened_m_u_p = merged_u_p.flatten() # 由于长短不一,不能放在一起 # np.savetxt(dataset_save_file, flattened_m_u_p.reshape(1, -1)) np.savez_compressed(phasedata_save_file, phasedata=flattened_m_u_p) return N_CHANNELS