def _populate_batch_queue(self, session, coord): ''' Queue thread routine. ''' file_count = len(self._data_set.files) index = -1 while not coord.should_stop(): index = self._data_set.next_index(index) % file_count wav_file, transcript = self._data_set.files[index] source = audiofile_to_input_vector(wav_file, self._model_feeder.numcep, self._model_feeder.numcontext) source_len = len(source) target = text_to_char_array( transcript, self._alphabet) ## 이 부분을 diphone 형태로 받아오도록 수정 target_len = len(target) if source_len < target_len: raise ValueError( 'Error: Audio file {} is too short for transcription.'. format(wav_file)) try: session.run(self._enqueue_op, feed_dict={ self._model_feeder.ph_x: source, self._model_feeder.ph_x_length: source_len, self._model_feeder.ph_y: target, self._model_feeder.ph_y_length: target_len }) except tf.errors.CancelledError: return
def _populate_batch_queue(self, session, coord): ''' Queue thread routine. ''' file_count = len(self._data_set.files) index = -1 while not coord.should_stop(): index = self._data_set.next_index(index) % file_count wav_file, transcript = self._data_set.files[index] source = audiofile_to_input_vector(wav_file, self._model_feeder.numcep, self._model_feeder.numcontext) source_len = len(source) target = text_to_char_array(transcript) target_len = len(target) try: session.run(self._enqueue_op, feed_dict={ self._model_feeder.ph_x: source, self._model_feeder.ph_x_length: source_len, self._model_feeder.ph_y: target, self._model_feeder.ph_y_length: target_len }) except tf.errors.CancelledError: return
def process_single_file(row): # row = index, Series _, file = row features = audiofile_to_input_vector(file.wav_filename, N_FEATURES, N_CONTEXT) transcript = text_to_char_array(file.transcript, alphabet) return features, len(features), transcript, len(transcript)
def generate_values(): samples = samples_from_files(sources, buffering=buffering) for sample in change_audio_types(samples, AUDIO_TYPE_NP, process_ahead=2 * batch_size if process_ahead is None else process_ahead): transcript = text_to_char_array(sample.transcript, Config.alphabet, context=sample.sample_id) transcript = to_sparse_tuple(transcript) yield sample.sample_id, sample.audio, sample.audio_format[0], transcript
def process_single_file(row, numcep, numcontext, alphabet): # row = index, Series _, file = row features = audiofile_to_input_vector(file.wav_filename, numcep, numcontext) features_len = len(features) - 2*numcontext transcript = text_to_char_array(file.transcript, alphabet) if features_len < len(transcript): raise ValueError('Error: Audio file {} is too short for transcription.'.format(file.wav_filename)) return features, features_len, transcript, len(transcript)
def _compute_source_target(self): txt_file = self._txt_files[0] wav_file = path.splitext(txt_file)[0] + ".wav" audio_waves = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext) with open(txt_file) as open_txt_file: original = ' '.join(open_txt_file.read().strip().lower().split(' ')[2:]).replace('.', '') target = text_to_char_array(original) return audio_waves, len(audio_waves), target, len(target)
def _populate_batch_queue(self, session): for txt_file, wav_file in self._files_circular_list: source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext) source_len = len(source) with codecs.open(txt_file, encoding="utf-8") as open_txt_file: target = unicodedata.normalize("NFKD", open_txt_file.read()).encode("ascii", "ignore") target = text_to_char_array(target) target_len = len(target) session.run(self._enqueue_op, feed_dict={ self._x: source, self._x_length: source_len, self._y: target, self._y_length: target_len})
def _populate_batch_queue(self, session): for wav_file, transcript in self._indices(): source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext) source_len = len(source) target = text_to_char_array(transcript) target_len = len(target) try: session.run(self._enqueue_op, feed_dict={ self._x: source, self._x_length: source_len, self._y: target, self._y_length: target_len }) except tf.errors.CancelledError: return
def _populate_batch_queue(self, session): for txt_file, wav_file in self._files_circular_list: if self._coord.should_stop(): return source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext) source_len = len(source) with codecs.open(txt_file, encoding="utf-8") as open_txt_file: target = unicodedata.normalize("NFKD", open_txt_file.read()) target = text_to_char_array(target) target_len = len(target) try: session.run(self._enqueue_op, feed_dict={ self._x: source, self._x_length: source_len, self._y: target, self._y_length: target_len }) except tf.errors.CancelledError: return
def _populate_batch_queue(self, session): for txt_file, wav_file in self._indices(): source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext) source_len = len(source) with codecs.open(txt_file, encoding="utf-8") as open_txt_file: # We need to do the encode-decode dance here because encode # returns a bytes() object on Python 3, and text_to_char_array # expects a string. target = unicodedata.normalize("NFKD", open_txt_file.read()) \ .encode("ascii", "ignore") \ .decode("ascii", "ignore") target = text_to_char_array(target) target_len = len(target) try: session.run(self._enqueue_op, feed_dict={ self._x: source, self._x_length: source_len, self._y: target, self._y_length: target_len}) except tf.errors.CancelledError: return
def _populate_batch_queue(self, session, coord): ''' Queue thread routine. ''' file_count = len(self._data_set.files) index = -1 while not coord.should_stop(): index = self._data_set.next_index(index) % file_count wav_file, transcript = self._data_set.files[index] source = audiofile_to_input_vector(wav_file, self._model_feeder.numcep, self._model_feeder.numcontext) source_len = len(source) target = text_to_char_array(transcript, self._alphabet) target_len = len(target) if source_len < target_len: raise ValueError('Error: Audio file {} is too short for transcription.'.format(wav_file)) try: session.run(self._enqueue_op, feed_dict={ self._model_feeder.ph_x: source, self._model_feeder.ph_x_length: source_len, self._model_feeder.ph_y: target, self._model_feeder.ph_y_length: target_len }) except tf.errors.CancelledError: return
.replace('á'.decode("utf-8"), 'a') \ .replace('é'.decode("utf-8"), 'e') \ .replace('í'.decode("utf-8"), 'i') \ .replace('ó'.decode("utf-8"), 'o') \ .replace('ú'.decode("utf-8"), 'u') \ .replace('ö'.decode("utf-8"), 'o') \ .replace('ü'.decode("utf-8"), 'u') \ .replace('ý'.decode("utf-8"), 'y') \ .replace('–'.decode("utf-8"), ' ') \ .replace('‼'.decode("utf-8"), ' ') \ .replace("mp3", "eme pe hiru") text = punct.sub(' ', text) text = " " + text + " " text = text.strip().lower() return text for root, dirnames, filenames in os.walk("/home/ubuntu/datasets/tempo/"): for filename in fnmatch.filter(filenames, "*.txt"): trans_file = os.path.join(root, filename) try: text.text_to_char_array( clean_non_ascii(codecs.open(trans_file, 'r', 'utf-8').read()), alphabet) except Exception as err: print(err, ) print(trans_file) #import pdb; pdb.set_trace() continue