answers.append(answer) print(answer) return np.array(alignments), test_source_text.split(' '), out_words if not os.path.exists('checkpoints_luong/encoder'): os.makedirs('checkpoints_luong/encoder') if not os.path.exists('checkpoints_luong/decoder'): os.makedirs('checkpoints_luong/decoder') # Uncomment these lines for inference mode encoder_checkpoint = tf.train.latest_checkpoint('checkpoints_luong/encoder') decoder_checkpoint = tf.train.latest_checkpoint('checkpoints_luong/decoder') if encoder_checkpoint is not None and decoder_checkpoint is not None: encoder.load_weights(encoder_checkpoint) decoder.load_weights(decoder_checkpoint) if MODE == 'train': for e in range(NUM_EPOCHS): en_initial_states = encoder.init_states(BATCH_SIZE) encoder.save_weights( 'checkpoints_luong/encoder/encoder_{}.h5'.format(e + 1)) decoder.save_weights( 'checkpoints_luong/decoder/decoder_{}.h5'.format(e + 1)) for batch, (source_seq, target_seq_in, target_seq_out) in enumerate(dataset.take(-1)): loss = train_step(source_seq, target_seq_in, target_seq_out, en_initial_states) if batch % 100 == 0:
out_words = [] alignments = [] while True: de_output, de_state_h, de_state_c, alignment = decoder( de_input, (de_state_h, de_state_c), en_outputs[0]) de_input = tf.expand_dims(tf.argmax(de_output, -1), 0) out_words.append(fr_tokenizer.index_word[de_input.numpy()[0][0]]) alignments.append(alignment.numpy()) if out_words[-1] == '<end>' or len(out_words) >= 20: break dist = ' '.join(out_words) dists.append(dist) return np.array(alignments), test_source_text.split(' '), out_words encoder.load_weights('checkpoints_luong/encoder/encoder.h5') decoder.load_weights('checkpoints_luong/decoder/decoder.h5') for i, test_sent in enumerate(test_sents): test_sequence = normalize_string(test_sent) alignments, source, prediction = predict(test_sequence) if i % 100 == 0: print(i, ' Predictions completed!') dists = ["'" + d[:-6] + "'" for d in dists] r['distractor'] = dists r.to_csv('Predictions.csv', index=None)
try: with open(file, 'r') as csvfile: reader = csv.reader(csvfile) table = [[float(e) for e in r] for r in reader] table = tf.cast(numpy.asarray(table), dtype=tf.dtypes.float32) return table except Exception: return None embedding = read_table("random_embedding.csv") dialog_list = Dialog.resolve_data("./data/movie_conversations.txt", "./data/movie_lines.txt", vocab_size=800) Dialog.load_word2ids() encoder = Encoder(batch_size=1, units=64, embedding=embedding) decoder = Decoder(batch_size=3, units=64, embedding=embedding, vocab_size=804) encoder.load_weights('encoder_weights_saving') decoder.load_weights('decoder_weights_saving') decoder.training = False decoder.mode = "beam" print("<<< Chatting Bot Demonstration >>>") print("Mode: 3 - Beam Search") val = "" while val != "Exit": val = input("Me: ") inp = str("TSTSTARTTST " + val + " TSTEOSTST") rep = predict(inp) print("Computer: " + rep)