if __name__ == '__main__':
    vocab_file = '../vocab/vocab'
    tokenizer_file = '../tokenizer/src_tokenizer'
    vocab = Vocab(vocab_file, 100000)
    tokenizer = Tokenizer(vocab)
    with open(tokenizer_file, mode='wb') as file:
        pickle.dump(tokenizer, file)
    max_sequence_len = 10
    batch_size = 4
    p = Preprocessor(batch_size, 'data/sentences.txt', tokenizer,
                     max_sequence_len)

    embedding_dim = 50
    hidden_dim = 100
    ae = AutoEncoder(max_sequence_len, vocab.NumIds(), embedding_dim,
                     hidden_dim)
    ae.build_models()
    reducelr_cb = ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.5,
                                    patience=10,
                                    verbose=1,
                                    mode='auto',
                                    epsilon=0.0001,
                                    cooldown=0,
                                    min_lr=1e-20)
    checkpoint_cb = ModelCheckpoint(model_weights, period=1)
    earlystopping_cb = EarlyStopping(min_delta=0.0001, patience=10)
    callbacks_list = [reducelr_cb, checkpoint_cb, earlystopping_cb]
    x = p.get_data()[:5000]
    print(len(x))
示例#2
0
#

print('-' * 30, 'Starting', '-' * 30)
vocab_file = '../vocab/vocab'
tokenizer_file = '../tokenizer/src_tokenizer'
vocab = Vocab(vocab_file, 100000)
tokenizer = Tokenizer(vocab)
with open(tokenizer_file, mode='wb') as file:
    pickle.dump(tokenizer, file)
max_sequence_len = 100
p = Preprocessor(1, 'data/sentences.txt', tokenizer, max_sequence_len)
data = p.get_data()[:5000]
print('-' * 30, 'Loaded data', '-' * 30)

hidden_size = 256
encoder1 = EncoderRNN(vocab.NumIds(), hidden_size)
decoder1 = DecoderRNN(hidden_size, vocab.NumIds(), 1)

if use_cuda:
    encoder1 = encoder1.cuda()
    decoder1 = decoder1.cuda()

trainEpochs(encoder1, decoder1, 5000, p, print_every=100)

######################################################################
#

evaluateRandomly(encoder1, decoder1)

######################################################################
# Visualizing Attention