Exemplo n.º 1
0
def main():
    import json

    option_file_path = 'dump/sentlm_base/options.json'

    with open(option_file_path, 'r') as fin:
        options = json.load(fin)

    with tf.variable_scope('lm'):
        model = SentenceLanguageModel(options, True)

    init = tf.initializers.global_variables()
    init_state_tensors = [model.init_lstm_state]
    final_state_tensors = [model.final_lstm_state]

    batch_size = options['batch_size']
    max_seq_length = options['unroll_steps']
    max_chars = options['char_cnn']['max_characters_per_token']

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    sess = tf.InteractiveSession(config=config)
    sess.run(init)
    feed_dict = {
        model.tokens_characters:
        np.zeros([batch_size, max_seq_length, max_chars], dtype=np.int32)
    }
    init_state_values = sess.run(init_state_tensors, feed_dict=feed_dict)

    from data import SentenceDataset, UnicodeCharsVocabularyPad

    test_prefix = 'data/test/violin_test.txt'
    vocab_path = 'data/vocabulary/vocab_bnc_5.txt'

    vocabulary = UnicodeCharsVocabularyPad(vocab_path,
                                           max_word_length=max_chars)
    dataset = SentenceDataset(test_prefix, vocabulary)
    a = dataset.iter_batches(batch_size=batch_size, seq_length=max_seq_length)
    b = next(a)

    feed_dict = {
        model.tokens_characters: b['tokens_characters'],
        model.seq_length: b['lengths'],
        model.next_token_id: b['next_token_id']
    }
    total_loss = sess.run(model.total_loss, feed_dict=feed_dict)
    losses = sess.run(model.losses, feed_dict=feed_dict)
    print(f'Loss: {total_loss} (should be around 12)')

    from IPython import embed
    embed()
    import os
    os._exit(1)
Exemplo n.º 2
0
def main():
    import json
    from data import SentenceDataset, VocabularyPad

    option_file_path = 'dump/sentpad_test/options.json'
    test_prefix = 'data/test/violin_test.txt'
    vocab_path = 'data/vocabulary/vocab_bnc_5.txt'

    with open(option_file_path, 'r') as fin:
        options = json.load(fin)

    with tf.variable_scope('lm'):
        model = SentenceLanguageModel(options, is_training=False)

    init = tf.initializers.global_variables()
    batch_size = options['batch_size']
    max_seq_length = options['unroll_steps']

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    sess = tf.InteractiveSession()
    sess.run(init)
    vocabulary = VocabularyPad(vocab_path)
    dataset = SentenceDataset(test_prefix, vocabulary)
    a = dataset.iter_batches(batch_size=batch_size, seq_length=max_seq_length)
    b = next(a)

    feed_dict = {
        model.token_ids: b['token_ids'],
        model.seq_length: b['lengths'],
        model.next_token_id: b['next_token_id']
    }
    total_loss = sess.run(model.total_loss, feed_dict=feed_dict)
    losses = sess.run(model.losses, feed_dict=feed_dict)
    print(f'Loss: {total_loss} (should be around 12)')

    from IPython import embed
    embed()
    import os
    os._exit(1)