Пример #1
0
def next_batch(bs=batch_size, train=True):
    x_batch = []
    y_batch = []
    seq_len_batch = []
    original_batch = []
    i = 0
    for k in range(bs):
        ut_length_dict = dict([(k, len(v['target']))
                               for (k, v) in audio.cache.items()])
        utterances = sorted(ut_length_dict.items(), key=operator.itemgetter(0))
        test_index = 346
        if train:
            utterances = [a[0] for a in utterances[test_index:]]
        else:
            utterances = [a[0] for a in utterances[:test_index]]
        training_element = audio.cache[utterances[i]]
        target_text = training_element['target']
        audio_buffer = training_element['audio']
        x, y, seq_len, original = convert_inputs_to_ctc_format(
            audio_buffer, sample_rate, 'whatever', num_features)
        x_batch.append(x)
        y_batch.append(y)
        seq_len_batch.append(seq_len)
        original_batch.append(original)
        i += 1

    y_batch = sparse_tuple_from(y_batch)
    seq_len_batch = np.array(seq_len_batch)[:, 0]
    for i, pad in enumerate(np.max(seq_len_batch) - seq_len_batch):
        x_batch[i] = np.pad(x_batch[i], ((0, 0), (0, pad), (0, 0)),
                            mode='constant',
                            constant_values=0)

    x_batch = np.concatenate(x_batch, axis=0)
    return x_batch, y_batch, seq_len_batch, original_batch
Пример #2
0
def next_batch(bs=batch_size, train=False):
    x_batch = []
    y_batch = []
    seq_len_batch = []
    original_batch = []
    ut_length_dict = dict([(k, len(v['target']))
                           for (k, v) in audio.cache.items()])
    utterances = sorted(ut_length_dict.items(), key=operator.itemgetter(0))
    utterances = [a[0] for a in utterances[:]]
    for k in range(bs):
        training_element = audio.cache[utterances[k]]
        target_text = training_element['target']
        audio_buffer = training_element['audio']

        x, y, seq_len, original = convert_inputs_to_ctc_format(
            audio_buffer, sample_rate, target_text, num_features)
        x_batch.append(x)
        y_batch.append(y)
        seq_len_batch.append(seq_len)
        original_batch.append(original)

    # Creating sparse representation to feed the placeholder
    # inputs = np.concatenate(x_batch, axis=0)
    y_batch = sparse_tuple_from(y_batch)
    seq_len_batch = np.array(seq_len_batch)[:, 0]
    for i, pad in enumerate(np.max(seq_len_batch) - seq_len_batch):
        x_batch[i] = np.pad(x_batch[i], ((0, 0), (0, pad), (0, 0)),
                            mode='constant',
                            constant_values=0)

    x_batch = np.concatenate(x_batch, axis=0)
    # return np.array(list(x_batch[0]) * batch_size), y_batch, np.array(seq_len_batch[0] * batch_size), original_batch
    # np.pad(x_batch[0], ((0, 0), (10, 0), (0, 0)), mode='constant', constant_values=0)

    return x_batch, y_batch, seq_len_batch, original_batch
Пример #3
0
 def next_testing_file(file_name):
     from audio_reader import read_audio_from_filename
     truncated_audio = read_audio_from_filename(
         file_name, sample_rate=c.AUDIO.SAMPLE_RATE)
     from utils import convert_inputs_to_ctc_format
     test_inputs, targets, test_seq_len, original = convert_inputs_to_ctc_format(
         truncated_audio, c.AUDIO.SAMPLE_RATE, "")
     return test_inputs, test_seq_len, original
Пример #4
0
 def next_training_batch():
     import random
     from utils import convert_inputs_to_ctc_format
     random_index = random.choice(list(audio.cache.keys())[0:5])
     training_element = audio.cache[random_index]
     target_text = training_element['target']
     out = convert_inputs_to_ctc_format(training_element['audio'],
                                        c.AUDIO.SAMPLE_RATE, target_text)
     train_inputs, train_targets, train_seq_len, original = out
     return train_inputs, train_targets, train_seq_len, original
 def next_testing_batch():
     import random
     from utils import convert_inputs_to_ctc_format
     random_index = random.choice(list(audio.cache.keys())[0:5])
     training_element = audio.cache[random_index]
     target_text = training_element['target']
     random_shift = np.random.randint(low=1, high=1000)
     print('random_shift =', random_shift)
     truncated_audio = training_element['audio'][random_shift:]
     train_inputs, train_targets, train_seq_len, original = convert_inputs_to_ctc_format(
         truncated_audio, c.AUDIO.SAMPLE_RATE, target_text)
     return train_inputs, train_targets, train_seq_len, original, random_shift