Exemplo n.º 1
0
def evaluate(args):
    logger = logging.getLogger("QANet")
    logger.info("====== evaluating ======")
    logger.info('Load data_set and vocab...')
    with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
        vocab = pickle.load(fin)

    assert len(args.dev_files) > 0, 'No dev files are provided.'
    dataloader = DataLoader(args.max_p_num,args.max_p_len, args.max_q_len, args.save_dir, dev_files=args.dev_files)

    num_train_steps = int(
        len(dataloader.train_set) / args.batch_size * args.epochs)
    num_warmup_steps = int(num_train_steps * args.warmup_proportion)
    logger.info('Converting text into ids...')
    dataloader.convert_to_ids(vocab)

    logger.info('Restoring the model...')
    model = Model(vocab, num_train_steps, num_warmup_steps, args)
    model.restore(args.model_dir, "averaged.ckpt-0")
    logger.info('Evaluating the model on dev set...')
    dev_batches = dataloader.gen_mini_batches('dev', 16, vocab.get_word_id(vocab.pad_token),shuffle=False)

    dev_loss, dev_bleu_rouge = model.evaluate(
        dev_batches, result_dir=args.result_dir, result_prefix='dev.predicted')

    logger.info('Loss on dev set: {}'.format(dev_loss))
    logger.info('Result on dev set: {}'.format(dev_bleu_rouge))
    logger.info('Predicted answers are saved to {}'.format(os.path.join(args.result_dir)))
Exemplo n.º 2
0
def train(args):
    logger = logging.getLogger("QANet")
    logger.info("====== training ======")

    logger.info('Load data_set and vocab...')
    with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
        vocab = pickle.load(fin)

    dataloader = DataLoader(args.max_p_num,args.max_p_len, args.max_q_len, args.save_dir,
                            args.train_files, args.dev_files)
    num_train_steps = int(
        len(dataloader.train_set) / args.batch_size * args.epochs)
    num_warmup_steps = int(num_train_steps * args.warmup_proportion)
    logger.info('Converting text into ids...')
    dataloader.convert_to_ids(vocab)

    logger.info('Initialize the model...')
    model = Model(vocab, num_train_steps, num_warmup_steps, args)
    del vocab

    logger.info('Training the model...')
    model.train(dataloader, args.epochs, args.batch_size, save_dir=args.model_dir, save_prefix=args.algo,
                dropout=args.dropout)

    logger.info('====== Done with model training! ======')
Exemplo n.º 3
0
def predict(args):
    logger = logging.getLogger("QANet")

    logger.info('Load data_set and vocab...')
    with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
        vocab = pickle.load(fin)

    assert len(args.test_files) > 0, 'No test files are provided.'
    dataloader = DataLoader(args.max_p_num,args.max_p_len, args.max_q_len, args.save_dir,
                            test_files=args.test_files)
    num_train_steps = int(
        len(dataloader.train_set) / args.batch_size * args.epochs)
    num_warmup_steps = int(num_train_steps * args.warmup_proportion)
    logger.info('Converting text into ids...')
    dataloader.convert_to_ids(vocab)
    logger.info('Restoring the model...')

    model = Model(vocab, num_train_steps, num_warmup_steps, args)
    model.restore(args.model_dir, 'qanet_64000')
    logger.info('Predicting answers for test set...')
    test_batches = dataloader.gen_mini_batches('test', 48, vocab.get_word_id(vocab.pad_token), shuffle=False)

    model.evaluate(test_batches,
                   result_dir=args.result_dir, result_prefix='test.predicted')