def test(encdec): # Loads vocab. src_vocab = make_vocab(SRC_TRAIN_FILE, SRC_VOCAB_SIZE) trg_vocab = make_vocab(TRG_TRAIN_FILE, TRG_VOCAB_SIZE) inv_trg_vocab = make_inv_vocab(trg_vocab) for line in sys.stdin: trg_ids = test_batch(encdec, src_vocab, trg_vocab, [line_to_sent(line.strip(), src_vocab)])[0] # Prints the result. print(" ".join(inv_trg_vocab[wid] for wid in trg_ids))
def test(encdec, args): # Loads vocab. src_vocab = make_vocab(SRC_TRAIN_FILE, args.src_vocab) trg_vocab = make_vocab(TRG_TRAIN_FILE, args.trg_vocab) inv_trg_vocab = make_inv_vocab(trg_vocab) for line in sys.stdin: sent = [line_to_sent(line.strip(), src_vocab)] trg_ids = test_batch(encdec, src_vocab, trg_vocab, sent, args.generation_limit)[0] # Prints the result. print(" ".join(inv_trg_vocab[wid] for wid in trg_ids))
def train(encdec, optimizer, prefix, best_valid_ppl): # Registers all parameters to the optimizer. optimizer.add_model(encdec) # Loads vocab. src_vocab = make_vocab(SRC_TRAIN_FILE, SRC_VOCAB_SIZE) trg_vocab = make_vocab(TRG_TRAIN_FILE, TRG_VOCAB_SIZE) inv_trg_vocab = make_inv_vocab(trg_vocab) print("#src_vocab:", len(src_vocab)) print("#trg_vocab:", len(trg_vocab)) # Loads all corpus train_src_corpus = load_corpus(SRC_TRAIN_FILE, src_vocab) train_trg_corpus = load_corpus(TRG_TRAIN_FILE, trg_vocab) valid_src_corpus = load_corpus(SRC_VALID_FILE, src_vocab) valid_trg_corpus = load_corpus(TRG_VALID_FILE, trg_vocab) test_src_corpus = load_corpus(SRC_TEST_FILE, src_vocab) test_ref_corpus = load_corpus_ref(REF_TEST_FILE, trg_vocab) num_train_sents = len(train_trg_corpus) num_valid_sents = len(valid_trg_corpus) num_test_sents = len(test_ref_corpus) num_train_labels = count_labels(train_trg_corpus) num_valid_labels = count_labels(valid_trg_corpus) print("train:", num_train_sents, "sentences,", num_train_labels, "labels") print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels") # Sentence IDs train_ids = list(range(num_train_sents)) valid_ids = list(range(num_valid_sents)) # Train/valid loop. for epoch in range(MAX_EPOCH): # Computation graph. g = Graph() Graph.set_default(g) print("epoch %d/%d:" % (epoch + 1, MAX_EPOCH)) print(" learning rate scale = %.4e" % optimizer.get_learning_rate_scaling()) # Shuffles train sentence IDs. random.shuffle(train_ids) # Training. train_loss = 0. for ofs in range(0, num_train_sents, BATCH_SIZE): print("%d" % ofs, end="\r") sys.stdout.flush() batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)] src_batch = make_batch(train_src_corpus, batch_ids, src_vocab) trg_batch = make_batch(train_trg_corpus, batch_ids, trg_vocab) g.clear() encdec.encode(src_batch, True) loss = encdec.loss(trg_batch, True) train_loss += loss.to_float() * len(batch_ids) optimizer.reset_gradients() loss.backward() optimizer.update() train_ppl = math.exp(train_loss / num_train_labels) print(" train PPL = %.4f" % train_ppl) # Validation. valid_loss = 0. for ofs in range(0, num_valid_sents, BATCH_SIZE): print("%d" % ofs, end="\r") sys.stdout.flush() batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)] src_batch = make_batch(valid_src_corpus, batch_ids, src_vocab) trg_batch = make_batch(valid_trg_corpus, batch_ids, trg_vocab) g.clear() encdec.encode(src_batch, False) loss = encdec.loss(trg_batch, False) valid_loss += loss.to_float() * len(batch_ids) valid_ppl = math.exp(valid_loss / num_valid_labels) print(" valid PPL = %.4f" % valid_ppl) # Calculates test BLEU. stats = defaultdict(int) for ofs in range(0, num_test_sents, BATCH_SIZE): print("%d" % ofs, end="\r") sys.stdout.flush() src_batch = test_src_corpus[ofs:min(ofs + BATCH_SIZE, num_test_sents)] ref_batch = test_ref_corpus[ofs:min(ofs + BATCH_SIZE, num_test_sents)] hyp_ids = test_batch(encdec, src_vocab, trg_vocab, src_batch) for hyp_line, ref_line in zip(hyp_ids, ref_batch): for k, v in get_bleu_stats(ref_line[1:-1], hyp_line).items(): stats[k] += v bleu = calculate_bleu(stats) print(" test BLEU = %.2f" % (100 * bleu)) # Saves best model/optimizer. if valid_ppl < best_valid_ppl: best_valid_ppl = valid_ppl print(" saving model/optimizer ... ", end="") sys.stdout.flush() encdec.save(prefix + ".model") optimizer.save(prefix + ".optimizer") save_ppl(prefix + ".valid_ppl", best_valid_ppl) print("done.") else: # Learning rate decay by 1/sqrt(2) new_scale = .7071 * optimizer.get_learning_rate_scaling() optimizer.set_learning_rate_scaling(new_scale)