def extract(self, url_list): labels = [item[1] for item in url_list] url_list = [item[0] for item in url_list] self.logger.info("Start extract %d audio." % len(url_list)) param = zip(url_list, [self.sample_rate for i in range(len(url_list))], [self.hop_length for i in range(len(url_list))], [self.fix_len for i in range(len(url_list))]) ys = ops.multi_processing(self._extract_one, param, self.process_num) if self.slides != [None, None] and self.slides != [0, 0]: slides = [self.slides for i in range(len(ys))] ys = ops.multi_processing(slide_windows, zip(ys, slides), self.process_num) return ys, labels
def run(config): reader = TFrecordClassBalanceGen(config, 'train') train_urls = [os.path.join(config.save_path, 'url/train_tmp_0')] for train_url in train_urls: x, y = ext_fbank_feature(train_url, config) x = ops.multi_processing(limit_len, x, config.n_threads, True) reader.write(x, y) del x, y
def ext_mfcc_feature(url_path, config): """This function is used for extract MFCC feature of a dataset. Parameters ---------- url_path : ``str`` The path of the 'PATH' file. config : ``config`` config of feature. (To decide if we need slide_window, and params of slide_window) Returns ------- fbank : ``list`` The feature array. each frame concat with the frame before and after it. label : ``list`` The label of fbank feature. """ logger = logging.getLogger('data') with open(url_path, 'r') as urls: labels = [] url_list = [] for url in list(urls): line, label = str(url).split(" ") index = eval(str(label).split("\n")[0]) labels.append([index]) url_list.append(line) logger.info("Extracting MFCC feature, utt_nums is %d" % len(url_list)) n_filt = [config.feature_dims for i in range(len(url_list))] slides = [config.slides for i in range(len(url_list))] if hasattr(config, 'min_db'): dbs = [config.min_db for i in range(len(url_list))] mfccs = ops.multi_processing(calc_mfcc, zip(url_list, n_filt, slides, dbs), config.n_threads) else: mfccs = ops.multi_processing(calc_mfcc, zip(url_list, n_filt, slides), config.n_threads) logger.info("Extracting MFCC feature succeed") return mfccs, labels
def extract(self, url_list): paired_data = self.url_pair(url_list, self.source_num) self.logger.info("Start extract %d audio." % len(url_list)) param = zip(paired_data, [self.sample_rate for i in range(len(url_list))], [self.n_fft for i in range(len(url_list))], [self.dims for i in range(len(url_list))], [self.config.hop_length for i in range(url_list)], [self.fix_len for i in range(len(url_list))]) spec_dic = ops.multi_processing(self._extract_one, param, self.process_num) return spec_dic['data'], spec_dic['label']
def ext_fbank_feature(url_path, config): """This function is used for extract features of one dataset. Parameters ---------- url_path : ``str`` The path of the 'PATH' file. config : ``config`` config of feature. (Contain the parameters of slide_window and feature_dims) Returns ------- fbank : ``list`` The feature array. each frame concat with the frame before and after it. label : ``list`` The label of fbank feature. Notes ----- Changeable concat size is in the todolist """ logger = logging.getLogger('data') url_list = [] labels = [] with open(url_path, 'r') as urls: for line in list(urls): url, label = str(line).split(" ") index = eval(str(label).split("\n")[0]) url_list.append(url) labels.append([index]) logger.info("Extracting fbank feature, utt_nums is %d" % len(url_list)) if config.fix_len is not None: max_len = get_max_audio_time(url_list) max_lens = [max_len for i in range(len(url_list))] else: max_lens = [None for i in range(len(url_list))] n_filt = [config.feature_dims for i in range(len(url_list))] if config.slides is not None: slide_l = [config.slides[0] for i in range(len(url_list))] slide_r = [config.slides[1] for i in range(len(url_list))] else: slide_l = [None for i in range(len(url_list))] slide_r = [None for i in range(len(url_list))] fbanks = ops.multi_processing( calc_fbank, zip(url_list, n_filt, slide_l, slide_r, max_lens), config.n_threads) logger.info("Extracting fbank feature succeed") return fbanks, labels
def ext_fbank_feature(url_path, config): """This function is used for extract features of one dataset. Parameters ---------- url_path : ``str`` The path of the 'PATH' file. config : ``config`` config of feature. (To decide if we need slide_window, and params of slide_window) Returns ------- fbank : ``list`` The feature array. each frame concat with the frame before and after it. label : ``list`` The label of fbank feature. Notes ----- Changeable concat size is in the todolist """ logger = logging.getLogger(config.model_name) url_list = [] labels = [] with open(url_path, 'r') as urls: for line in list(urls): url, label = str(line).split(" ") index = eval(str(label).split("\n")[0]) url_list.append(url) labels.append([index]) logger.info("Extracting fbank feature, utt_nums is %d" % len(url_list)) n_filt = [config.feature_dims for i in range(len(url_list))] slides = [config.slides for i in range(len(url_list))] fbanks = ops.multi_processing(calc_fbank, zip(url_list, n_filt, slides), config.n_threads) logger.info("Extracting fbank feature succeed") return fbanks, labels
def ext_spec_feature(url_path, config): logger = logging.getLogger('data') with open(url_path, 'r') as urls: labels = [] url_list = [] for url in list(urls): line, label = str(url).split(" ") index = eval(str(label).split("\n")[0]) labels.append([index]) url_list.append(line) logger.info("Extracting Spec feature, utt_nums is %d" % len(url_list)) NFFT = [config.NFFT for i in range(len(url_list))] frame_size = [config.frame_size for i in range(len(url_list))] (mag_spec, phase_spec) = ops.multi_processing(get_stft, zip(url_list, NFFT, frame_size), config.n_threads) logger.info("Extracting Spec feature succeed") return mag_spec, phase_spec
if __name__ == '__main__': config = TrainConfig('../config.json') config.save_path = '.' train_urls = [ '/home/data/speaker-recognition/url/train_1', '/home/data/speaker-recognition/url/train_2', '/home/data/speaker-recognition/url/train_3' ] enroll_url = '/home/data/speaker-recognition/url/enroll' test_url = '/home/data/speaker-recognition/url/test' gen_train = TFrecordGen(config, 'Train.record') for train_url in train_urls: x, y = ext_fbank_feature(train_url, config) x = ops.multi_processing(limit_len, x, config.n_threads, True) gen_train.write(x, y) del x, y gen_enroll = TFrecordGen(config, 'Enroll.record') x, y = ext_fbank_feature(enroll_url, config) x = ops.multi_processing(limit_len, x, config.n_threads, True) gen_enroll.write(x, y) gen_test = TFrecordGen(config, 'Test.record') x, y = ext_fbank_feature(test_url, config) x = ops.multi_processing(limit_len, x, config.n_threads, True) gen_test.write(x, y) logger = logging.getLogger(config.model_name + '_train') #logger.info("Feature proccessing done.")