def split_tr_tt_demand(self): path_DEMAND = '/media/jeonghwan/HDD2/Dataset/DEMAND/' save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/demand_noise/tr/' # Channel1을 선택해서 diffuse로 만들었음 noise_list = glob(path_DEMAND + '**/*ch01.wav') for i, noi in enumerate(noise_list): aud, fs = librosa.core.load(noi, sr=None, mono=False) aud = aud[np.newaxis, :] for j in range(1, 4): aud_temp, _ = librosa.core.load(noi.replace( 'ch01', 'ch0{}'.format(j + 1)), sr=None, mono=False) aud = np.concatenate((aud, aud_temp[np.newaxis, :]), axis=0) fn = noi.split('/')[-2] + '_' + noi.split('/')[-1] len_tr = int(aud.shape[1] * 4 / 5) noi_tr = aud[:, :len_tr] noi_tt = aud[:, len_tr:] audiowrite(save_path + fn.split('/')[-1], noi_tr.T, fs) audiowrite( save_path.replace('/tr/', '/tt/') + fn.split('/')[-1], noi_tt.T, fs)
def mix_rir_and_sound_source_210301(self): ### --------- single source dataset----------- ### save_path = '/media/jeonghwan/HDD2/IS2021/dataset/SSL/tt/' rir_path = '/media/jeonghwan/HDD2/IS2021/dataset/simulated_RIR/tr/anechoic/' spc_path = '/media/jeonghwan/HDD1/Dataset/MS-SNSD/clean_test/' rir_list = glob(rir_path + '*.npz') spc_list = glob(spc_path + '*.wav') # generate random rir index spc_list.sort() for i, _spc in enumerate(tqdm(spc_list)): # read audio file aud, fs = librosa.core.load(_spc, sr=None, mono=False) if len(aud.shape) != 1: aud = aud[:, 0] #aud.shape[1] idx_s = np.random.randint(0, len(rir_list)) npz = np.load(rir_list[idx_s], allow_pickle=True) # convolve rir = npz['rir'] Y = ss.convolve(rir, aud[:, np.newaxis]) audiowrite( save_path + rir_list[idx_s].split('/')[-1].split('.')[0] + '_' + _spc.split('/')[-1], Y, fs)
def convolve_and_save_rir(self, fn): # read audio file save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_speech/tt/' rir_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/simulated_RIR/anechoic/tr/' mix_all = False rir_list = glob(rir_path + '*.npz') aud, fs = librosa.core.load(fn, sr=None, mono=False) if len(aud.shape) != 1: aud = aud[:, 0] if mix_all == True: for i, _rir in enumerate(rir_list): npz = np.load(_rir, allow_pickle=True) rir = npz['rir'] Y = ss.convolve(rir, aud[:, np.newaxis]) audiowrite( save_path + _rir.split('/')[-1].split('.')[0] + '_' + fn.split('/')[-1], Y, fs) else: idx_s = np.random.randint(0, len(rir_list)) npz = np.load(rir_list[idx_s], allow_pickle=True) # convolve rir = npz['rir'] Y = ss.convolve(rir, aud[:, np.newaxis]) audiowrite( save_path + rir_list[idx_s].split('/')[-1].split('.n')[0] + '_' + fn.split('/')[-1], Y, fs)
def convolve_and_save_rir_tt(self, fn): # path set mode = 'tt' save_path = os.getcwd() + '/multi_channel_speech/' + mode + '/clean' Path(save_path).mkdir(parents=True, exist_ok=True) rir_path = os.getcwd() + '/rir/' + mode mix_all = False rir_list = glob(rir_path + '/*/*.npz') # aud, fs = librosa.core.load(fn, sr=None, mono=False) aud, fs = audioread(fn) if len(aud.shape) != 1: aud = aud[:, 0] if mix_all == True: for i, _rir in enumerate(rir_list): npz = np.load(_rir, allow_pickle=True) rir = npz['rir'] Y = ss.convolve(rir, aud[:, np.newaxis]) audiowrite( save_path + '/' + _rir.split('/')[-1].split('.')[0] + '_' + fn.split('/')[-1], Y, fs) else: idx_s = np.random.randint(0, len(rir_list)) npz = np.load(rir_list[idx_s], allow_pickle=True) # convolve rir = npz['rir'] Y = ss.convolve(rir, aud[:, np.newaxis]) audiowrite( save_path + '/' + rir_list[idx_s].split('/')[-2] + '_' + rir_list[idx_s].split('/')[-1].split('.n')[0] + '_' + fn.split('/')[-1], Y, fs)
def mix_rir_and_sound_source(self, mode): """ convolve speech and speech_rir (random selected) :param mode: tr/cv/tt :return: save multi-channel speech """ # path set save_path = os.getcwd() + '/multi_channel_speech/' + mode rir_path = os.getcwd() + '/rir/' + mode if mode == 'cv': rir_path = os.getcwd() + '/rir/tr' spc_path = '/home/dail/PycharmProjects/DCCRN/data/tr/clean' # rir list and sound source list rir_list = glob(rir_path + '/*/*.npz') spc_list = glob(spc_path + '/*.wav') # generate random rir index spc_list.sort() _use_par = False if _use_par == True: if mode == 'tr': _ = parmap.map(self.convolve_and_save_rir_tr, spc_list, pm_pbar=True, pm_processes=28) if mode == 'cv': _ = parmap.map(self.convolve_and_save_rir_cv, spc_list, pm_pbar=True, pm_processes=28) if mode == 'tt': _ = parmap.map(self.convolve_and_save_rir_tt, spc_list, pm_pbar=True, pm_processes=28) else: for i, _spc in enumerate(tqdm(spc_list)): # read audio file # aud, fs = librosa.core.load(_spc, sr=None, mono=False) aud, fs = audioread(_spc) if len(aud.shape) != 1: aud = aud[:, 0] #aud.shape[1] idx_s = np.random.randint(0, len(rir_list)) npz = np.load(rir_list[idx_s], allow_pickle=True) # convolve rir = npz['rir'] Y = ss.convolve(rir, aud[:, np.newaxis]) audiowrite( save_path + '/' + rir_list[idx_s].split('/')[-2] + '_' + rir_list[idx_s].split('/')[-1].split('.n')[0] + '_' + _spc.split('/')[-1], Y, fs)
def generate_whitenoise(self): wn = np.random.randn(self.fs * 5, 1) - 0.5 # create random signal [0, 1] wn = wn / np.max(np.abs(wn)) * 0.5 plt.figure() plt.plot(wn) plt.show() audiowrite('wn.wav', wn, self.fs) return wn
def mix_spatially_white_noise(self, fn, SNR): """ :param fn: audio filename :param SNR: want to mix :return: """ # add spatially uncorrelated white noise swn = np.random.randn(Y.shape[0], 4) - 0.5 Y += swn * 0.002 audiowrite('mixed_{}dB.wav'.format(SNR)) return
def convolve_and_save_rir_mp(self, fn): # read audio file # save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_speech/tr/' save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_speech/tt/' #save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_noisy_direct/tr/' #save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_noisy_direct/tt/' aud, fs = librosa.core.load(fn[0], sr=None, mono=False) if len(aud.shape) != 1: aud = aud[:, 0] room_num = fn[1].split('/')[-2] # convolve npz = np.load(fn[1], allow_pickle=True) rir = npz['rir'] Y = ss.convolve(rir, aud[:, np.newaxis]) audiowrite( save_path + fn[1].split('/')[-1].split('.n')[0] + '_' + room_num + '_' + fn[0].split('/')[-1], Y, fs)
def split_tr_tt_diffuse(self): path_DEMAND = '/media/jeonghwan/HDD2/Dataset/DEMAND/' save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/diffuse_noise/tr/' # Channel1을 선택해서 diffuse로 만들었음 noise_list = glob(path_DEMAND + '**/*ch01.wav') for i, noi in enumerate(noise_list): aud, fs = librosa.core.load(noi, sr=None, mono=False) fn = noi.split('/')[-2] + '_' + noi.split('/')[-1] len_tr = int(aud.shape[0] * 4 / 5) noi_tr = aud[:len_tr] noi_tt = aud[len_tr:] audiowrite(save_path + fn.split('/')[-1], noi_tr, fs) audiowrite( save_path.replace('/tr/', '/tt/') + fn.split('/')[-1], noi_tt, fs)
def generate_single_source(self): # read audio file aud, fs = audioread(path='wn.wav', sr=None, mono=False) # load rir file npz_path = './' fn = 'az150_el0_r2.0' npz_list = glob(npz_path + '{}.npz'.format(fn)) npz = np.load(npz_list[0], allow_pickle=True) rir = npz['rir'] Y = self.convolve_rir_signal(rir, aud) # save audiowrite('{}.wav'.format(fn), Y, 16000) return Y
def mix_spc_noi_tt(self): """ (1) load single channel speech / single channel speech (2) select speech rir and noise rir (3) convolve speech(noise) and speech(noise) rir (4) mix multi-channel speech and multi-channel noise (5) room(2) * noise(4) * SNR(5) :return: save noisy(mix), clean(s1), noise(s2) files / save 'output.csv' file """ # path set spc_path = '/home/dail/PycharmProjects/DCCRN/datasets/tr/clean' noi_path = '/home/dail/PycharmProjects/DCCRN/datasets/tr/noise' snr_list = [-5, 0, 5, 10, 15] save_path = os.getcwd() + '/output/tt' Path(save_path + '/mix').mkdir(parents=True, exist_ok=True) Path(save_path + '/s1').mkdir(parents=True, exist_ok=True) Path(save_path + '/s2').mkdir(parents=True, exist_ok=True) # multi-channel speech list s_list = glob(spc_path + '/*.wav') # single-channel noise list n_list = glob(noi_path + '/*.wav') # make 'output.csv' f = open(f'output/tt/output.csv', 'w', newline='') wr = csv.writer(f) wr.writerow([ 'order', 'speech', 'room', 'speech_rir', 'noise', 'noise_rir', 'snr' ]) cnt = 0 for i, s in enumerate(s_list): # multi_ch_aud, fs = librosa.core.load(s, sr=None, mono=False) # multi_ch_aud_na = os.path.splitext(os.path.basename(s))[0] spc, fs = audioread(s) spc_na = os.path.splitext(os.path.basename(s))[0] # select speech/noise rir # np.random.seed(1) rand_azi_s = np.random.choice( np.concatenate((np.arange(31), np.arange(330, 360)), axis=0)) # np.random.seed(1) rand_azi_n = np.random.choice(np.arange(180, 271)) rand_r = np.round(np.random.choice(np.linspace(1, 2.2, 5)), 1) spc_rir_na = f'az{rand_azi_s}_el0_r{rand_r}' noi_rir_na = f'az{rand_azi_n}_el0_r{rand_r}' room = ['R4', 'R5'] # room for n in range(2): spc_rir = os.getcwd() + f'/rir/tt/{room[n]}/{spc_rir_na}.npz' npz_s = np.load(spc_rir, allow_pickle=True) rir_s = npz_s['rir'] multi_ch_spc = ss.convolve(rir_s, spc[:, np.newaxis]) multi_ch_spc = multi_ch_spc.transpose() noi_rir = os.getcwd() + f'/rir/tt/{room[n]}/{noi_rir_na}.npz' npz_n = np.load(noi_rir, allow_pickle=True) rir_n = npz_n['rir'] # noise for idx_n in range(len(n_list)): noi, fs2 = librosa.core.load(n_list[idx_n], sr=None) noi_na = os.path.splitext(os.path.basename( n_list[idx_n]))[0] assert fs == fs2 rand_start = np.random.randint( 0, noi.shape[0] - multi_ch_spc.shape[1] - 8191) multi_ch_noi_tmp = ss.convolve( rir_n, noi[rand_start:rand_start + multi_ch_spc.shape[1] + 8191, np.newaxis]) multi_ch_noi = multi_ch_noi_tmp[8191:-8191, :].transpose() # mix speech and noise with SNR # idx_snr = np.random.randint(0, len(snr_list)) for l in range(len(snr_list)): cnt = cnt + 1 snr = snr_list[l] noisy, clean, noise = self.snr_mix( multi_ch_spc, multi_ch_noi, snr) audiowrite( save_path + f'/mix/noisy_{cnt:#05d}_{noi_na}_{snr}.wav', noisy.transpose(), fs) audiowrite(save_path + f'/s1/clean_{cnt:#05d}.wav', clean.transpose(), fs) audiowrite(save_path + f'/s2/noise_{cnt:#05d}.wav', noise.transpose(), fs) wr.writerow([ cnt, spc_na, room[n], spc_rir_na, noi_na, noi_rir_na, snr ]) f.close()
def mix_spc_noi_0401(self, mode): """ (1) load multi-channel speech (2) calculate noise rir (=speech rir + 90/180/270 degree) (3) convolve noise (random selected) and noise rir (4) mix multi-channel speech and multi-channel noise with SNR (random selected) :param mode: tr/cv/tt :return: save noisy(mix), clean(s1), noise(s2) files / save 'output.csv' file """ # path set spc_path = os.getcwd() + '/multi_channel_speech/' + mode noi_path = os.getcwd() + '/Data/' + mode + '/noise' snr_list = [-5, 0, 5, 10, 15, 20] save_path = os.getcwd() + '/output/' + mode Path(save_path + '/mix').mkdir(parents=True, exist_ok=True) Path(save_path + '/s1').mkdir(parents=True, exist_ok=True) Path(save_path + '/s2').mkdir(parents=True, exist_ok=True) # multi-channel speech list s_list = glob(spc_path + '/*.wav') # single-channel noise list n_list = glob(noi_path + '/*.wav') # make 'output.csv' f = open(f'output/{mode}/output.csv', 'w', newline='') wr = csv.writer(f) wr.writerow([ 'order', 'speech', 'room', 'speech_rir', 'noise', 'noise_rir', 'azimuth', 'snr' ]) for i, s in enumerate(s_list): multi_ch_aud, fs = librosa.core.load(s, sr=None, mono=False) multi_ch_aud_na = os.path.splitext(os.path.basename(s))[0] # select noise azimuth split = multi_ch_aud_na.split('_') spc_na = f'{split[-2]}_{split[-1]}' spc_rir_na = f'{split[1]}_{split[2]}_{split[3]}' # print(time.time() - start) # noise rir = speech rir + 90/180/270 degree az = int(split[1][2:]) room = split[0] n = np.random.randint(1, 4) noi_az = (az + 90 * n) % 360 # +90 +180 +270 degree split = multi_ch_aud_na.split('_') noi_rir_na = f'az{noi_az}_{split[2]}_{split[3]}' noi_rir = os.getcwd() + f'/rir/{mode}/{split[0]}/{noi_rir_na}.npz' if mode == 'cv': noi_rir = os.getcwd() + f'/rir/tr/{split[0]}/{noi_rir_na}.npz' # select and load random noise idx_n = np.random.randint(0, len(n_list)) noi, fs2 = librosa.core.load(n_list[idx_n], sr=None) noi_na = os.path.splitext(os.path.basename(n_list[idx_n]))[0] assert fs == fs2 # convolve noise with RIR npz = np.load(noi_rir, allow_pickle=True) rir = npz['rir'] rand_start = np.random.randint( 0, noi.shape[0] - multi_ch_aud.shape[1] - 8191) multi_ch_noi_tmp = ss.convolve( rir, noi[rand_start:rand_start + multi_ch_aud.shape[1] + 8191, np.newaxis]) multi_ch_noi = multi_ch_noi_tmp[8191:-8191, :].transpose() # mix speech and noise with SNR idx_snr = np.random.randint(0, len(snr_list)) snr = snr_list[idx_snr] noisy, clean, noise = self.snr_mix(multi_ch_aud, multi_ch_noi, snr) audiowrite( save_path + f'/mix/noisy_{i + 1:#05d}_{noi_na}_{snr}.wav', noisy.transpose(), fs) audiowrite(save_path + f'/s1/clean_{i + 1:#05d}.wav', clean.transpose(), fs) audiowrite(save_path + f'/s2/noise_{i + 1:#05d}.wav', noise.transpose(), fs) wr.writerow( [i, spc_na, room, spc_rir_na, noi_na, noi_rir_na, 90 * n, snr]) f.close()