Пример #1
0
        train_loader = datagen(train_encoder_batch,
                               train_decoder_batch, train_target_batch)
        val_loader = datagen(val_encoder_batch,
                             val_decoder_batch, val_target_batch)
    
        model.fit_generator(train_loader,
                            steps_per_epoch=train_steps,
                            epochs=1, verbose=1,
                            validation_data=val_loader,
                            validation_steps=val_steps)

        # On epoch end - decode a batch of misspelled tokens from the
        # validation set to visualize speller performance.
        nb_tokens = 5
        input_tokens, target_tokens, decoded_tokens = decode_sequences(
            val_encoder, val_target, input_ctable, target_ctable,
            maxlen, reverse, encoder_model, decoder_model, nb_tokens,
            sample_mode=sample_mode, random=True)
        
        print('-')
        print('Input tokens:  ', input_tokens)
        print('Decoded tokens:', decoded_tokens)
        print('Target tokens: ', target_tokens)
        print('-')
        
        # Save the model at end of each epoch.
        model_file = '_'.join(['seq2seq', 'epoch', str(epoch + 1)]) + '.h5'
        save_dir = 'checkpoints'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = os.path.join(save_dir, model_file)
        print('Saving full model to {:s}'.format(save_path))
Пример #2
0
    test_word_set = list(filter(None, set(tokenize(test_text))))
    train_max_word_len = max([len(token) for token in word_set]) + 2
    test_max_word_len = max([len(token) for token in test_word_set]) + 2
    train_enc_tokens, train_dec_tokens, _ = prepare_word_tokens(
        word_set, train_max_word_len, ERR_RATE)
    test_enc_tokens, test_dec_tokens, test_target_tokens = prepare_word_tokens(
        test_word_set, test_max_word_len, ERR_RATE)

    enc_charset = set(' '.join(train_enc_tokens))
    dec_charset = set(' '.join(train_dec_tokens))

    enc_oh = OneHotEncoder(enc_charset)
    dec_oh = OneHotEncoder(dec_charset)

    token_count = len(test_enc_tokens)
    right_guess_counter = 0
    counter = 0

    for enc_token, dec_token, target_token in zip(test_enc_tokens,
                                                  test_enc_tokens,
                                                  test_target_tokens):
        _, decoded_token = decode_sequences([enc_token], enc_oh, dec_oh,
                                            train_max_word_len, encoder,
                                            decoder, 1, False)
        #print(f'Decoded: {decoded_token[0]} | Target: {target_token.rstrip("*")}')
        if decoded_token[0] == target_token.rstrip('*'):
            right_guess_counter += 1
        counter += 1
        if (counter % 10 == 0):
            print(f'{((counter/token_count) * 100):.3f}% finished')
    print(f'Accuracy: {right_guess_counter/token_count}')
                                                    maxlen,
                                                    error_rate=error_rate,
                                                    shuffle=False)

    input_chars = set(' '.join(train_encoder))
    target_chars = set(' '.join(train_decoder))
    input_ctable = CharacterTable(input_chars)
    target_ctable = CharacterTable(target_chars)

    encoder_model, decoder_model = restore_model(model_path, hidden_size)

    input_tokens, target_tokens, decoded_tokens = decode_sequences(
        misspelled_tokens,
        target_tokens,
        input_ctable,
        target_ctable,
        maxlen,
        reverse,
        encoder_model,
        decoder_model,
        nb_tokens,
        sample_mode=sample_mode,
        random=False)

    print('-')
    print('Input sentence:  ', ' '.join([token for token in input_tokens]))
    print('-')
    print('Decoded sentence:', ' '.join([token for token in decoded_tokens]))
    print('-')
    print('Target sentence: ', ' '.join([token for token in target_tokens]))
Пример #4
0
DATA_DIR = './data'

if __name__ == '__main__':
    # Prepare model
    encoder, decoder = load_s2s_model(
        'test-no_reverse-hs-512_err-0.8_bs-256_e-30_drop-0.2.h5', HIDDEN_SIZE)

    text = load_text(DATA_DIR)
    word_set = list(filter(None, set(tokenize(text))))
    max_word_len = max([len(token) for token in word_set]) + 2
    train_enc_tokens, train_dec_tokens, _ = prepare_word_tokens(
        word_set, max_word_len, ERR_RATE)

    enc_charset = set(' '.join(train_enc_tokens))
    dec_charset = set(' '.join(train_dec_tokens))
    enc_oh = OneHotEncoder(enc_charset)
    dec_oh = OneHotEncoder(dec_charset)

    # Input decoding loop
    while True:
        sentence = input('\nEnter sentence to decode:\n')
        tokens = list(filter(None, tokenize(sentence)))
        nb_of_tokens = len(tokens)
        prepared_tokens = []
        for token in tokens:
            prepared_tokens.append(get_padded_token(token, max_word_len))
        input_tokens, decoded_tokens = decode_sequences(
            prepared_tokens, enc_oh, dec_oh, max_word_len, encoder, decoder,
            nb_of_tokens, False)
        print('Decoded sentence:',
              ' '.join([token for token in decoded_tokens]))