Exemplo n.º 1
0
    def __init__(self, model_file, word_embeddings_cache_file, stopwords_file,
                 word2dfs_file):
        # init torch random seeds
        torch.manual_seed(1234)
        np.random.seed(1234)

        # load model
        self.model = QAModel.load('', model_file)
        # load vectors
        self.vec_dim = self._preload_cached_embeddings(
            word_embeddings_cache_file)
        self.unk_term_vec = np.random.uniform(-0.25, 0.25, self.vec_dim)

        # stopwords
        self.stoplist = set([line.strip() for line in open(stopwords_file)])

        # word dfs
        if os.path.isfile(word2dfs_file):
            with open(word2dfs_file, "rb") as w2dfin:
                self.word2dfs = pickle.load(w2dfin)
Exemplo n.º 2
0
                logger.info('Achieved better dev_map ... saved model')

            if args.test_on_each_epoch:
                test_scores = trainer.test(test_set, args.batch_size)
                map, mrr = compute_map_mrr(args.dataset_folder, test_set,
                                           test_scores)
                logger.info("------- MAP {}, MRR {}".format(map, mrr))

            if (i - best_model) >= args.patience:
                logger.warning('No improvement since the last {} epochs. Stopping training'\
                               .format(i - best_model))
                break

        logger.info(' ------------ Training epochs completed! ------------')
        logger.info('Best dev MAP in training phase = {:.4f}'.format(best_map))

    trained_model = QAModel.load(args.model_outfile)
    evaluator = Trainer(trained_model, args.eta, args.mom, args.no_loss_reg,
                        vec_dim, args.cuda)

    for split in [test_set, dev_set]:
        evaluator.load_input_data(args.dataset_folder, cache_file, None, None,
                                  split)
        if args.paper_ext_feats or args.paper_ext_feats_stem:
            evaluator.data_splits[split][-1] = ext_feats_for_splits[split]
            #set_external_features_as_per_paper(evaluator)
        split_scores = evaluator.test(split, args.batch_size)
        map, mrr = compute_map_mrr(args.dataset_folder, split, split_scores,
                                   args.run_name_prefix)
        logger.info("-------{} MAP {}, MRR {}".format(split, map, mrr))
Exemplo n.º 3
0
        
        dev_map, dev_mrr = compute_map_mrr(args.dataset_folder, dev_set, dev_scores)
        logger.info("------- MAP {}, MRR {}".format(dev_map, dev_mrr))

        if dev_map - best_map > 1e-3: # new map is better than best map 
            best_model = i
            best_map = dev_map

            QAModel.save(net, args.dataset_folder, args.model_fname)
            logger.info('Achieved better dev_map ... saved model')

        if args.test_on_each_epoch:            
            test_scores = trainer.test(test_set, args.batch_size)            
            map, mrr = compute_map_mrr(args.dataset_folder, test_set, test_scores)
            logger.info("------- MAP {}, MRR {}".format(map, mrr))
        
        if (i - best_model) >= args.patience:
            logger.warning('No improvement since the last {} epochs. Stopping training'.format(i - best_model))
            break
    
    logger.info(' ------------ Training epochs completed! ------------')        
    logger.info('Best MAP in training phase = {:.4f}'.format(best_map))

    trained_model = QAModel.load(args.dataset_folder, args.model_fname)
    evaluator = Trainer(trained_model, args.eta, args.mom, args.no_loss_reg, vec_dim)    
    evaluator.load_input_data(args.dataset_folder, cache_file, None, None, test_set)
    test_scores = evaluator.test(test_set, args.batch_size)
    
    map, mrr = compute_map_mrr(args.dataset_folder, test_set, test_scores)
    logger.info("------- MAP {}, MRR {}".format(map, mrr))