Пример #1
0
    answers.append(answer)
    print(answer)
    return np.array(alignments), test_source_text.split(' '), out_words


if not os.path.exists('checkpoints_luong/encoder'):
    os.makedirs('checkpoints_luong/encoder')
if not os.path.exists('checkpoints_luong/decoder'):
    os.makedirs('checkpoints_luong/decoder')

# Uncomment these lines for inference mode
encoder_checkpoint = tf.train.latest_checkpoint('checkpoints_luong/encoder')
decoder_checkpoint = tf.train.latest_checkpoint('checkpoints_luong/decoder')

if encoder_checkpoint is not None and decoder_checkpoint is not None:
    encoder.load_weights(encoder_checkpoint)
    decoder.load_weights(decoder_checkpoint)

if MODE == 'train':
    for e in range(NUM_EPOCHS):
        en_initial_states = encoder.init_states(BATCH_SIZE)
        encoder.save_weights(
            'checkpoints_luong/encoder/encoder_{}.h5'.format(e + 1))
        decoder.save_weights(
            'checkpoints_luong/decoder/decoder_{}.h5'.format(e + 1))
        for batch, (source_seq, target_seq_in,
                    target_seq_out) in enumerate(dataset.take(-1)):
            loss = train_step(source_seq, target_seq_in, target_seq_out,
                              en_initial_states)

            if batch % 100 == 0:
Пример #2
0
    out_words = []
    alignments = []

    while True:
        de_output, de_state_h, de_state_c, alignment = decoder(
            de_input, (de_state_h, de_state_c), en_outputs[0])
        de_input = tf.expand_dims(tf.argmax(de_output, -1), 0)
        out_words.append(fr_tokenizer.index_word[de_input.numpy()[0][0]])

        alignments.append(alignment.numpy())

        if out_words[-1] == '<end>' or len(out_words) >= 20:
            break

    dist = ' '.join(out_words)
    dists.append(dist)
    return np.array(alignments), test_source_text.split(' '), out_words


encoder.load_weights('checkpoints_luong/encoder/encoder.h5')
decoder.load_weights('checkpoints_luong/decoder/decoder.h5')

for i, test_sent in enumerate(test_sents):
    test_sequence = normalize_string(test_sent)
    alignments, source, prediction = predict(test_sequence)
    if i % 100 == 0: print(i, ' Predictions completed!')

dists = ["'" + d[:-6] + "'" for d in dists]
r['distractor'] = dists
r.to_csv('Predictions.csv', index=None)
Пример #3
0
    try:
        with open(file, 'r') as csvfile:
            reader = csv.reader(csvfile)
            table = [[float(e) for e in r] for r in reader]
            table = tf.cast(numpy.asarray(table), dtype=tf.dtypes.float32)
            return table
    except Exception:
        return None


embedding = read_table("random_embedding.csv")
dialog_list = Dialog.resolve_data("./data/movie_conversations.txt",
                                  "./data/movie_lines.txt",
                                  vocab_size=800)
Dialog.load_word2ids()
encoder = Encoder(batch_size=1, units=64, embedding=embedding)
decoder = Decoder(batch_size=3, units=64, embedding=embedding, vocab_size=804)
encoder.load_weights('encoder_weights_saving')
decoder.load_weights('decoder_weights_saving')
decoder.training = False
decoder.mode = "beam"

print("<<< Chatting Bot Demonstration >>>")
print("Mode: 3 - Beam Search")
val = ""
while val != "Exit":
    val = input("Me: ")
    inp = str("TSTSTARTTST " + val + " TSTEOSTST")
    rep = predict(inp)
    print("Computer: " + rep)