Exemplo n.º 1
0
 def __init__(self, batch_items, batch_size, lang):
     """
     Initialize BatchGenerator
     :param batch_items: sequence of anything. This can be a list, a DataFrame, ...
     :param batch_size: number of elements in each batch
     :param lang: language to use. This will affect the tokens used for encoding the labels
     """
     self.batch_items = batch_items
     self.batch_size = batch_size
     self.cur_index = 0
     self.tokens = get_tokens(lang)
Exemplo n.º 2
0
 def __init__(self, model, language, greedy):
     """
     Initialize the decoded
     :param model: The trained Keras model that made the inferences
     :param language: language to use for decoding. This will affect the alphabet used for decoding
     :param greedy: whether a best-path (True) or a beam search approch (False) shall be used
     """
     self.ctc_input = model.get_layer('ctc').input[0]
     self.input_data = model.get_layer('the_input').input
     self.test_func = K.function(
         [self.input_data, K.learning_phase()], [self.ctc_input])
     self.greedy = greedy
     self.strategy = 'best-path' if greedy else 'beam search'
     self.tokens = get_tokens(language)
Exemplo n.º 3
0
def create_model(target_dir, opt, dropouts, language):
    tokens = get_tokens(language)
    n_labels = len(tokens) + 1  # +1 for blank token!
    print(f'using {n_labels} labels in output layer')

    if args.model_path:
        print(f'trying to load model from {target_dir}')
        if not isdir(args.model_path):
            print(f'ERROR: directory {target_dir} does not exist!', file=sys.stderr)
            exit(0)
        model = load_keras_model(target_dir, opt)
    else:
        if dropouts:
            print('Creating new model with dropouts')
            model = deep_speech_dropout(n_features=26, n_fc=args.n_fc, n_recurrent=args.n_recurrent, n_labels=n_labels)
        else:
            print('Creating new model without dropouts')
            model = deep_speech_lstm(n_features=26, n_fc=args.n_fc, n_recurrent=args.n_recurrent, n_labels=n_labels)
        model.compile(optimizer=opt, loss=ctc)

    model.summary()

    return model
Exemplo n.º 4
0
 def test_decoding_german(self):
     int_sequence = [6, 27, 18, 28, 5, 18, 0, 29, 0, 0]
     tokens = get_tokens('de')
     decoded = decode(int_sequence, tokens)
     assert_that(decoded, is_('färöer ü'),
                 'leading/trailing spaces should be stripped')
Exemplo n.º 5
0
 def test_encoding_german(self):
     text = 'färöer ü  '
     tokens = get_tokens('de')
     encoded = encode(text, tokens)
     assert_that(encoded, is_([6, 27, 18, 28, 5, 18, 0, 29]),
                 'leading/trailing spaces should be stripped')
Exemplo n.º 6
0
 def test_decoding_english(self):
     int_sequence = [6, 15, 15, 0, 2, 1, 18, 28, 28, 0, 0]
     tokens = get_tokens('en')
     decoded = decode(int_sequence, tokens)
     assert_that(decoded, is_('foo bar'),
                 'leading/trailing spaces should be stripped')
Exemplo n.º 7
0
 def test_encoding_english(self):
     text = 'foo bar  '
     tokens = get_tokens('en')
     encoded = encode(text, tokens)
     assert_that(encoded, is_([6, 15, 15, 0, 2, 1, 18]),
                 'leading/trailing spaces should be stripped')
Exemplo n.º 8
0
 def test_get_tokens(self):
     assert_that(len(get_tokens('en')), is_(28))
     assert_that(len(get_tokens('de')), is_(30))