Exemplo n.º 1
0
            ### Validation
            if conf.do_validate and batch_count % conf.valid_freq == 0:
                logging.info("VALIDATION: Feed-forward pass.")
                # Do the predictions
                tv = time.time()
                y_pred_dict = model.predict({'input': X_val})
                logging.info("VALIDATION: Feed-forward pass took %.2f seconds." % (time.time() - tv))

                # TODO: Numpy should have a solution for this
                for i in range(X_val.shape[1]):
                    y_pred_val[:, i, :] = y_pred_dict['output_%d' % i]

                # Take argmax of output machines and report BLEU
                one_best = get_1best_translations(y_pred_dict, c.trg_idx_to_sent)
                one_best_bleu = scorer.score_sentences(y_val_strings, one_best)
                logging.info("VALIDATION: Direct 1-best (argmax) %s" % one_best_bleu)

                # Beam search + 1-best + BLEU
                logging.info("VALIDATION: Starting beam search...")
                d = decoder.process(y_pred_val)

                # Take 1-best of beam-search and report BLEU
                hyps = [c.trg_idx_to_sent(sent[0][0]) for sent in d]
                bleu = scorer.score_sentences(y_val_strings, hyps)
                logging.info("VALIDATION: 1-best of %d-best %s" % (conf.beam_size, bleu))

                # If BLEU is better, save the weights
                if bleu > best_val_bleu:
                    # Reset patience counter
                    no_val_improvement = 0
Exemplo n.º 2
0
    # Load vocabulary files and invert them
    print "Loading vocabulary files..."
    src_vocab = load_object(args.source_vocab)
    trg_vocab = load_object(args.target_vocab)
    corp.set_src_vocab(src_vocab)
    corp.set_trg_vocab(trg_vocab)
    print "Completed."

    # Map source sentences
    mapped_sents = corp.map_sentences(src_sents, corp.src_vocab, conf.max_seq_len, skip_unk=args.remove_unk)
    print "%d source sentences mapped." % len(mapped_sents)

    # Create decoder instance
    decoder = Decoder(trg_vocab["</s>"], trg_vocab["<unk>"], args.beamsize, args.generate_unk)

    # Validation prediction placeholder
    y_pred = np.ndarray((mapped_sents.shape[0], mapped_sents.shape[1], len(trg_vocab))).astype(np.float32)
    # Feed-forward pass
    y_pred_dict = model.predict({'input': mapped_sents})

    for i in range(y_pred.shape[1]):
        y_pred[:, i, :] = y_pred_dict['output_%d' % i]

    scorer = MultiBleuScorer()
    d = decoder.process(y_pred)
    # Generate 1-best sentences
    hyps = [corp.trg_idx_to_sent(hyp[0]) for hyp in d[0]]
    bleu = scorer.score_sentences(ref_trans_file, hyps)
    print bleu