Exemplo n.º 1
0
def run_seq2seq_model(args, model_params=None):
    source_vocab = seq2seq_util.gen_vocab(
        args.source_corpus,
        args.unk_threshold,
    )
    target_vocab = seq2seq_util.gen_vocab(
        args.target_corpus,
        args.unk_threshold,
    )
    logger.info('Source vocab size {}'.format(len(source_vocab)))
    logger.info('Target vocab size {}'.format(len(target_vocab)))

    batches = gen_batches(args.source_corpus, args.target_corpus, source_vocab,
                          target_vocab, model_params['batch_size'],
                          args.max_length)
    logger.info('Number of training batches {}'.format(len(batches)))

    batches_eval = gen_batches(args.source_corpus_eval, args.target_corpus_eval,
                               source_vocab, target_vocab,
                               model_params['batch_size'], args.max_length)
    logger.info('Number of eval batches {}'.format(len(batches_eval)))

    with Seq2SeqModelCaffe2(
        model_params=model_params,
        source_vocab_size=len(source_vocab),
        target_vocab_size=len(target_vocab),
        num_gpus=args.num_gpus,
        num_cpus=20,
    ) as model_obj:
        model_obj.initialize_from_scratch()
        for i in range(args.epochs):
            logger.info('Epoch {}'.format(i))
            total_loss = 0
            for batch in batches:
                total_loss += model_obj.step(
                    batch=batch,
                    forward_only=False,
                )
            logger.info('\ttraining loss {}'.format(total_loss))
            total_loss = 0
            for batch in batches_eval:
                total_loss += model_obj.step(
                    batch=batch,
                    forward_only=False,
                )
            logger.info('\teval loss {}'.format(total_loss))
            if args.checkpoint is not None:
                checkpoint_path = '{0}-{1}'.format(args.checkpoint, i)
                assert workspace.RunOperatorOnce(core.CreateOperator(
                    'Save',
                    model_obj.model.GetAllParams(),
                    [],
                    absolute_path=True,
                    db=checkpoint_path,
                    db_type='minidb',
                ))
                logger.info('Model saved to ' + checkpoint_path)
Exemplo n.º 2
0
def run_seq2seq_model(args, model_params=None):
    source_vocab = seq2seq_util.gen_vocab(
        args.source_corpus,
        args.unk_threshold,
    )
    target_vocab = seq2seq_util.gen_vocab(
        args.target_corpus,
        args.unk_threshold,
    )
    logger.info('Source vocab size {}'.format(len(source_vocab)))
    logger.info('Target vocab size {}'.format(len(target_vocab)))

    batches = gen_batches(args.source_corpus, args.target_corpus, source_vocab,
                          target_vocab, model_params['batch_size'],
                          args.max_length)
    logger.info('Number of training batches {}'.format(len(batches)))

    batches_eval = gen_batches(args.source_corpus_eval, args.target_corpus_eval,
                               source_vocab, target_vocab,
                               model_params['batch_size'], args.max_length)
    logger.info('Number of eval batches {}'.format(len(batches_eval)))

    with Seq2SeqModelCaffe2(
        model_params=model_params,
        source_vocab_size=len(source_vocab),
        target_vocab_size=len(target_vocab),
        num_gpus=args.num_gpus,
        num_cpus=20,
    ) as model_obj:
        model_obj.initialize_from_scratch()
        for i in range(args.epochs):
            logger.info('Epoch {}'.format(i))
            total_loss = 0
            for batch in batches:
                total_loss += model_obj.step(
                    batch=batch,
                    forward_only=False,
                )
            logger.info('\ttraining loss {}'.format(total_loss))
            total_loss = 0
            for batch in batches_eval:
                total_loss += model_obj.step(
                    batch=batch,
                    forward_only=True,
                )
            logger.info('\teval loss {}'.format(total_loss))
            if args.checkpoint is not None:
                model_obj.save(args.checkpoint, i)
Exemplo n.º 3
0
def run_seq2seq_beam_decoder(args, model_params, decoding_params):
    source_vocab = seq2seq_util.gen_vocab(
        args.source_corpus,
        args.unk_threshold,
    )
    logger.info('Source vocab size {}'.format(len(source_vocab)))
    target_vocab = seq2seq_util.gen_vocab(
        args.target_corpus,
        args.unk_threshold,
    )
    inversed_target_vocab = {v: k for (k, v) in viewitems(target_vocab)}
    logger.info('Target vocab size {}'.format(len(target_vocab)))

    decoder = Seq2SeqModelCaffe2EnsembleDecoder(
        translate_params=dict(
            ensemble_models=[dict(
                source_vocab=source_vocab,
                target_vocab=target_vocab,
                model_params=model_params,
                model_file=args.checkpoint,
            )],
            decoding_params=decoding_params,
        ),
    )
    decoder.load_models()

    for line in sys.stdin:
        numerized_source_sentence = seq2seq_util.get_numberized_sentence(
            line,
            source_vocab,
        )
        translation, alignment, _ = decoder.decode(
            numerized_source_sentence,
            2 * len(numerized_source_sentence) + 5,
        )
        print(' '.join([inversed_target_vocab[tid] for tid in translation]))
Exemplo n.º 4
0
def run_seq2seq_beam_decoder(args, model_params, decoding_params):
    source_vocab = seq2seq_util.gen_vocab(
        args.source_corpus,
        args.unk_threshold,
    )
    logger.info('Source vocab size {}'.format(len(source_vocab)))
    target_vocab = seq2seq_util.gen_vocab(
        args.target_corpus,
        args.unk_threshold,
    )
    inversed_target_vocab = {v: k for (k, v) in viewitems(target_vocab)}
    logger.info('Target vocab size {}'.format(len(target_vocab)))

    decoder = Seq2SeqModelCaffe2EnsembleDecoder(translate_params=dict(
        ensemble_models=[
            dict(
                source_vocab=source_vocab,
                target_vocab=target_vocab,
                model_params=model_params,
                model_file=args.checkpoint,
            )
        ],
        decoding_params=decoding_params,
    ), )
    decoder.load_models()

    for line in sys.stdin:
        numerized_source_sentence = seq2seq_util.get_numberized_sentence(
            line,
            source_vocab,
        )
        translation, alignment, _ = decoder.decode(
            numerized_source_sentence,
            2 * len(numerized_source_sentence) + 5,
        )
        print(' '.join([inversed_target_vocab[tid] for tid in translation]))