def next_batch(bs=batch_size, train=True): x_batch = [] y_batch = [] seq_len_batch = [] original_batch = [] i = 0 for k in range(bs): ut_length_dict = dict([(k, len(v['target'])) for (k, v) in audio.cache.items()]) utterances = sorted(ut_length_dict.items(), key=operator.itemgetter(0)) test_index = 346 if train: utterances = [a[0] for a in utterances[test_index:]] else: utterances = [a[0] for a in utterances[:test_index]] training_element = audio.cache[utterances[i]] target_text = training_element['target'] audio_buffer = training_element['audio'] x, y, seq_len, original = convert_inputs_to_ctc_format( audio_buffer, sample_rate, 'whatever', num_features) x_batch.append(x) y_batch.append(y) seq_len_batch.append(seq_len) original_batch.append(original) i += 1 y_batch = sparse_tuple_from(y_batch) seq_len_batch = np.array(seq_len_batch)[:, 0] for i, pad in enumerate(np.max(seq_len_batch) - seq_len_batch): x_batch[i] = np.pad(x_batch[i], ((0, 0), (0, pad), (0, 0)), mode='constant', constant_values=0) x_batch = np.concatenate(x_batch, axis=0) return x_batch, y_batch, seq_len_batch, original_batch
def next_batch(bs=batch_size, train=False): x_batch = [] y_batch = [] seq_len_batch = [] original_batch = [] ut_length_dict = dict([(k, len(v['target'])) for (k, v) in audio.cache.items()]) utterances = sorted(ut_length_dict.items(), key=operator.itemgetter(0)) utterances = [a[0] for a in utterances[:]] for k in range(bs): training_element = audio.cache[utterances[k]] target_text = training_element['target'] audio_buffer = training_element['audio'] x, y, seq_len, original = convert_inputs_to_ctc_format( audio_buffer, sample_rate, target_text, num_features) x_batch.append(x) y_batch.append(y) seq_len_batch.append(seq_len) original_batch.append(original) # Creating sparse representation to feed the placeholder # inputs = np.concatenate(x_batch, axis=0) y_batch = sparse_tuple_from(y_batch) seq_len_batch = np.array(seq_len_batch)[:, 0] for i, pad in enumerate(np.max(seq_len_batch) - seq_len_batch): x_batch[i] = np.pad(x_batch[i], ((0, 0), (0, pad), (0, 0)), mode='constant', constant_values=0) x_batch = np.concatenate(x_batch, axis=0) # return np.array(list(x_batch[0]) * batch_size), y_batch, np.array(seq_len_batch[0] * batch_size), original_batch # np.pad(x_batch[0], ((0, 0), (10, 0), (0, 0)), mode='constant', constant_values=0) return x_batch, y_batch, seq_len_batch, original_batch
def next_testing_file(file_name): from audio_reader import read_audio_from_filename truncated_audio = read_audio_from_filename( file_name, sample_rate=c.AUDIO.SAMPLE_RATE) from utils import convert_inputs_to_ctc_format test_inputs, targets, test_seq_len, original = convert_inputs_to_ctc_format( truncated_audio, c.AUDIO.SAMPLE_RATE, "") return test_inputs, test_seq_len, original
def next_training_batch(): import random from utils import convert_inputs_to_ctc_format random_index = random.choice(list(audio.cache.keys())[0:5]) training_element = audio.cache[random_index] target_text = training_element['target'] out = convert_inputs_to_ctc_format(training_element['audio'], c.AUDIO.SAMPLE_RATE, target_text) train_inputs, train_targets, train_seq_len, original = out return train_inputs, train_targets, train_seq_len, original
def next_testing_batch(): import random from utils import convert_inputs_to_ctc_format random_index = random.choice(list(audio.cache.keys())[0:5]) training_element = audio.cache[random_index] target_text = training_element['target'] random_shift = np.random.randint(low=1, high=1000) print('random_shift =', random_shift) truncated_audio = training_element['audio'][random_shift:] train_inputs, train_targets, train_seq_len, original = convert_inputs_to_ctc_format( truncated_audio, c.AUDIO.SAMPLE_RATE, target_text) return train_inputs, train_targets, train_seq_len, original, random_shift