def prepare_training_data(chime_data_dir, dest_dir, suffix_id): for stage in ['tr', 'dt']: fpath, flist = gen_flist_simu(chime_data_dir, stage, suffix_id) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(fpath)): clean_audio = get_audio_data(f, '_clean') noise_audio = get_audio_data(f, '_noise') X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open( os.path.join(dest_dir, 'flist_{}_{}.json'.format(stage, suffix_id)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def prepare_training_data(chime_data_dir, dest_dir): for stage in ['tr', 'dt']: flist = gen_flist_simu(chime_data_dir, stage, ext=True) export_flist = list() # mkdir_p(os.path.join(dest_dir, stage)) for idx in range(len(flist)): f = flist[idx] clean_audio = get_audio_data(f, '.Clean') noise_audio = get_audio_data(f, '.Noise') X = stft(clean_audio, time_dim=1, size=256, shift=128).transpose( (1, 0, 2)) N = stft(noise_audio, time_dim=1, size=256, shift=128).transpose( (1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def prepare_clean_training_data(chime_data_dir, dest_dir): start = 0 # print("sdsd") for stage in ['tr', 'dt']: reset_counter = 0 flist = gen_flist_simu(chime_data_dir, stage, ext=True) # print(flist) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) clean_data = audioread('/media/hipo/Mega Store/Dataset/single file/Chinese_tai_clean.wav') print("clean_data size:", clean_data.shape[0]) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): # clean_audio = get_audio_data(f, '.Clean') noise_audio = get_audio_data(f, '.Noise') # print(chime_data_dir) chime_size = audioread('{}.CH{}{}.Noise.wav'.format(f, 1, '')) clean_files = list() end = chime_size.shape[0] + start if end > clean_data.shape[0]: print("reset counter: ", reset_counter + 1) start = 0 end = chime_size.shape[0] + start for i in range(1, 7): y = clean_data[start:end] start = end clean_files.append(y[None, :]) clean_files = np.concatenate(clean_files, axis=0) clean_files = clean_files.astype(np.float32) clean_audio = clean_files X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def prepare_training_data(chime_data_dir, dest_dir): for stage in ['tr', 'dt']: flist = gen_flist_simu(chime_data_dir, stage, ext=True) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): clean_audio = get_audio_data(f, '.Clean') noise_audio = get_audio_data(f, '.Noise') X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def prepare_other_training_data(train_dir, dest_dir): start = 0 chime_data_dir = os.path.join(train_dir[:-1], 'tr') print(chime_data_dir) for stage in ['tr', 'dt']: if stage is 'dt': chime_data_dir = os.path.join(train_dir[:-1], 'dt') print(chime_data_dir) reset_counter = 0 # flist = gen_flist_simu(chime_data_dir, stage, ext=True) flist = [f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f))] # print(flist) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) noise_data = audioread('/media/hipo/lento/Dataset/single file/noise_files/all_noise.wav') print("noise_data size:", noise_data.shape[0]) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): # clean_audio = get_audio_data(f) path = os.path.join(chime_data_dir, f) clean_audio = get_audio_single(path) # clean_audioa = audioread(path) # clean_audiob = audioread(path) # multi_track = list() # multi_track.append(clean_audioa[None, :]) # multi_track.append(clean_audiob[None, :]) # multi_track = np.concatenate(multi_track, axis=0) # multi_track = multi_track.astype(np.float32) # print(multi_track.shape) chime_size = audioread(path) noise_files = list() end = chime_size.shape[0] + start if end > noise_data.shape[0]: print("reset counter: ", reset_counter + 1) start = 0 end = chime_size.shape[0] + start for i in range(1, 2): y = noise_data[start:end] start = end noise_files.append(y[None, :]) noise_files = np.concatenate(noise_files, axis=0) noise_files = noise_files.astype(np.float32) noise_audio = noise_files # print("speech size: ", multi_track.shape, "noise size: ", noise_audio.shape) X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def get_batch(self, uttid, channel=0, divisor=16, max_length=np.inf): """ Load a batch of data from files """ batch = {} if 'numpy' in self.flists: fname = self.flists['numpy']['data'][uttid] feats = load_arrays_from_numpy(self.base_dir, fname) else: feats = {} start_idx = None for name in ['clean', 'noisy', 'noise'] & self.flists.keys(): fname = self.flists[name]['data'][uttid] if self.flists[name]['type'] == 'json': feats[name] = load_arrays_from_wav(self.base_dir, fname, idx=channel) feats[name] = np.expand_dims(feats[name], axis=0) elif self.flists[name]['type'] == 'scp': feats[name] = load_arrays_from_scp( self.base_dir, fname, remove_deltas=name == 'noisy') else: raise ValueError("Type must be one of 'json', 'scp'") if 'noise' in feats: feats['noisy'] = feats['clean'] + feats['noise'] del feats['noise'] # Reshape features for name in ['clean', 'noisy'] & feats.keys(): if feats[name].dtype == 'complex64': feats[name] = np.abs(feats[name]) if self.logify: feats[name] = np.log(feats[name] + 0.01) if 'senone' in self.flists: feats['senone'] = self.flists['senone']['data'][uttid] feats['senone'] = np.expand_dims(feats['senone'], axis=0) if 'senone' in feats: feats['senone'] = np.expand_dims(feats['senone'], axis=0) if feats['senone'].shape[2] < feats['clean'].shape[2]: padding = [ (0, 0), (0, 0), (0, feats['clean'].shape[2] - feats['senone'].shape[2]) ] feats['senone'] = np.pad(feats['senone'], padding, 'edge') start = -1 for name in ['clean', 'noisy', 'noise', 'senone'] & feats.keys(): if feats[name].shape[2] > max_length: if start == -1: start = np.random.randint(feats[name].shape[2] - max_length) feats[name] = feats[name][:, :, start:start + max_length] feats['frames'] = feats[name].shape[-2] #feats = shrink_to_min(feats, out_shape, max_length) if self.compute_ibm: if 'noise' in feats and 'clean' in feats: feats['ibm_x'], feats['ibm_n'] = estimate_IBM( feats['clean'], feats['noise']) elif 'noisy' in feats and 'clean' in feats: feats['ibm_x'], feats['ibm_n'] = estimate_IBM( feats['clean'], feats['noisy'], -15, -15) else: raise ValueError( "To compute IBM, clean and noise or noisy signals are required" ) if self.compute_irm: if 'noise' in feats and 'clean' in feats: feats['irm'] = feats['clean'] / (feats['clean'] + feats['noise']) #feats['irm'][feats['irm'] > 1] = 1 elif 'noisy' in feats and 'clean' in feats: if np.min(feats['clean']) < 0: minimum = min(np.min(feats['clean']), np.min(feats['noisy'])) feats['irm'] = (feats['clean'] - minimum + 1e-6) / (feats['noisy'] - minimum + 1e-6) #feats['irm'] /= 1.3 else: feats['irm'] = np.sqrt(feats['clean']) / np.sqrt( feats['noisy']) #feats['irm'] /= 2 feats['irm'][feats['irm'] > 1] = 1 else: raise ValueError( "To compute IRM, clean and noise or noisy signals are required" ) #if 'noise' in feats: # feats['noisy'] = feats['noise'] + feats['clean'] if 'trans' in self.flists: indices = np.array( [(0, i) for i in range(len(self.flists['trans']['data'][uttid]))], dtype=np.int32) values = np.array(self.flists['trans']['data'][uttid], dtype=np.int32) shape = np.array((1, len(self.flists['trans']['data'][uttid])), dtype=np.int32) feats['trans'] = (indices, values, shape) return feats