def single_crm(idx_str_list, mix_path, data_path): F_mix = np.load(mix_path) mix_name = 'mix' mid_name = '' dataset_line = '' for idx in idx_str_list: mid_name += '-%s' % idx mix_name += '-%s' % idx mix_name += '.npy' dataset_line += mix_name for idx in idx_str_list: single_name = 'single-%s.npy' % idx path = '%s/single/%s' % (data_path, single_name) F_single = np.load(path) cRM = utils.fast_cRM(F_single, F_mix) last_name = '-%s' % idx cRM_name = 'crm' + mid_name + last_name + '.npy' store_path = '%s/crm/%s' % (data_path, cRM_name) np.save(store_path, cRM) with open('%s/crm_log.txt' % data_path, 'a') as f: f.write(cRM_name) f.write('\n') dataset_line += (' ' + cRM_name) with open('%s/dataset.txt' % data_path, 'a') as f: f.write(dataset_line) f.write('\n')
def single_crm(idx_str_list, mix_path, database_repo): F_mix = np.load(mix_path) mid_name = "" mix_name = "mix" dataset_line = "" for idx in idx_str_list: mid_name += "-%s" % idx mix_name += "-%s" % idx mix_name += '.npy' dataset_line += mix_name for idx in idx_str_list: single_name = 'single-%s.npy' % idx path = '%s/single/%s' % (database_repo, single_name) F_single = np.load(path) cRM = utils.fast_cRM(F_single, F_mix) last_name = '-%s' % idx cRM_name = 'crm' + mid_name + last_name + '.npy' # save crm to npy store_path = '%s/crm/%s' % (database_repo, cRM_name) np.save(store_path, cRM) # save crm information to log with open('%s/crm_log.txt' % database_repo, 'a') as f: f.write(cRM_name) f.write('\n') dataset_line += (" " + cRM_name) # write in database log with open('%s/dataset.txt' % database_repo, 'a') as f: f.write(dataset_line) f.write('\n')
def generate_mix_sample(audio_path_list,num_speaker,fix_sr=16000,verbose=0): ''' generate mix sample from audios in the list :param audio_path_list: list contains path of the wav audio file :param num_speaker: specify the task for speech separation :param fix_sr: fix sample rate ''' # initiate variables # shape of F_mix = (298,257,2) # shpae of crm = (298,257,2) data_list = [] F_list = [] # STFT list for each sample cRM_list = [] mix_name = "mix" crm_name = "crm" post_name = "" # import data for i in range(num_speaker): idx,path =audio_path_list[i] post_name += "-%05d"%idx data, _ = librosa.load(path,sr=fix_sr) data_list.append(data) # create mix audio according to mix rate mix_rate = 1.0 / float(num_speaker) mix = np.zeros(shape=data_list[0].shape) for data in data_list: mix += data*mix_rate # transfrom data via STFT and several preprocessing function for i in range(num_speaker): F = utils.fast_stft(data_list[i],power=False) F_list.append(F) F_mix = utils.fast_stft(mix,power=False) # create cRM for each speaker and fill into y_sample for i in range(num_speaker): cRM_list.append(utils.fast_cRM(F_list[i],F_mix)) # return values if verbose == 1: print('shape of X: ',F_mix.shape) for i in range(len(cRM_list)): print('shape of cRM%s :'%i,cRM_list[i].shape) # save record in txt mix_name += post_name crm_name += post_name # write txt with open('audio_database/dataset.txt','a') as f: f.write(mix_name+".npy") for i in range(len(cRM_list)): line = " " + crm_name + ("-%05d"%audio_path_list[i][0]) + ".npy" f.write(line) f.write("\n") # save file as npy np.save(('audio_database/mix/%s.npy'%mix_name), F_mix) for i in range(len(cRM_list)): name = crm_name + ("-%05d"%audio_path_list[i][0]) np.save(('audio_database/crm/%s.npy'%name), cRM_list[i])