예제 #1
0
파일: major_exec.py 프로젝트: yumoxu/detnet
def test_major_sent(synthese):
    # logger.info('START: model testing...')
    dataset_type = 'synthese' if synthese else 'lead'
    data_loader = pipe.DomDetDataLoader(dataset_type=dataset_type)

    n_iter, total_loss = 0, 0.0
    n_samples, total_hamming = 0, 0.0

    cf_mats, precision_list, recall_list = list(), list(), list()

    for batch_idx, batch in enumerate(data_loader):
        n_iter += 1

        y_true = batch['labels'].cpu().numpy()
        d_batch = len(y_true)
        des_sent_info = batch['des_sent_info'].cpu().numpy()
        n_samples += np.sum(des_sent_info[:, -1])

        # logger.info('batch_size: {0}'.format(y_true.shape[0]))

        if synthese:
            hyp_scores = np.tile(y_pred_vec, (d_batch, 1))
            fids = batch['fids'].cpu().numpy()
            eval_args = {
                'hyp_scores': hyp_scores,
                'fids': fids,
                'is_hiernet': True
            }
            eval_res = metrics.metric_eval_for_syn_doc(**eval_args)
        else:
            hyp_scores = np.tile(y_pred_vec, (d_batch, max_n_sents, 1))
            eval_args = {
                'y_true': y_true,
                'hyp_scores': hyp_scores,
                'des_sent_info': des_sent_info,
            }
            eval_res = metrics.metric_eval(**eval_args)

        cf_mats.append(eval_res['cf_mat_list'])
        precision_list.extend(eval_res['precision_list'])
        recall_list.extend(eval_res['recall_list'])
        total_hamming += eval_res['hamming']

    cls_f1, avg_f1 = metrics.compute_f1_with_confusion_mats(cf_mats)
    example_based_f1 = metrics.compute_example_based_f1(
        precision_list=precision_list, recall_list=recall_list)
    hamming = total_hamming / n_samples

    eval_log_info = {
        'example_based_f1': example_based_f1,
        'avg_f1': avg_f1,
        'cls_f1': cls_f1,
        'hamming': hamming,
    }

    res_str = 'example_based_f1: {example_based_f1:.6f},' \
              'avg_f1: {avg_f1:.6f}, cls_f1: {cls_f1}, hamming: {hamming:.6f}'

    logger.info(res_str.format(**eval_log_info))
예제 #2
0
파일: major_exec.py 프로젝트: yumoxu/detnet
def test_model_sent_mturk():
    logger.info('START: testing Baseline [MAJOR] on [MTURK SENTS]')

    data_loader = pipe.DomDetDataLoader(dataset_type='mturk')

    n_iter, total_loss = 0, 0.0
    n_samples, total_hamming = 0, 0.0
    cf_mats, precision_list, recall_list = list(), list(), list()

    for batch_idx, batch in enumerate(data_loader):
        n_iter += 1

        y_true = batch['sent_labels'].cpu().numpy(
        )  # d_batch * max_n_sents * n_doms
        d_batch = len(y_true)
        hyp_scores = np.tile(y_pred_vec, (d_batch, 1))
        # hyp_scores = np.tile(y_pred_vec, (d_batch, max_n_sents, 1))

        n_sents = batch['n_sents'].cpu().numpy()
        n_samples += np.sum(n_sents)

        logger.info('batch_size: {0}'.format(y_true.shape[0]))

        eval_args = {
            'y_true': y_true,
            'hyp_scores': hyp_scores,
            'n_sents': n_sents,
            'is_hiernet': True,
        }
        eval_res = metrics.metric_eval_for_mturk(**eval_args)

        cf_mats.append(eval_res['cf_mat_list'])
        precision_list.extend(eval_res['precision_list'])
        recall_list.extend(eval_res['recall_list'])
        total_hamming += eval_res['hamming']

    cls_f1, avg_f1 = metrics.compute_f1_with_confusion_mats(cf_mats)
    example_based_f1 = metrics.compute_example_based_f1(
        precision_list=precision_list, recall_list=recall_list)
    hamming = total_hamming / n_samples

    eval_log_info = {
        'example_based_f1': example_based_f1,
        'avg_f1': avg_f1,
        'cls_f1': cls_f1,
        'hamming': hamming,
    }

    res_str = 'example_based_f1: {example_based_f1:.6f},' \
              'avg_f1: {avg_f1:.6f}, cls_f1: {cls_f1}, hamming: {hamming:.6f}'

    logger.info(res_str.format(**eval_log_info))
예제 #3
0
파일: major_exec.py 프로젝트: yumoxu/detnet
def test_model_word_mturk(matching_mode=None, corpus='wiki'):
    logger.info('START: model testing on [MTURK WORDS]')

    grain = 'word'
    dataset_type = '-'.join(('mturk', corpus, grain))
    data_loader = pipe.DomDetDataLoader(dataset_type=dataset_type)

    n_samples = 0
    p_list = list()
    r_list = list()

    for batch_idx, batch in enumerate(data_loader):
        # turn vars to numpy arrays
        y_true_sents = batch['sent_labels'].cpu().numpy(
        )  # d_batch * max_n_sents * n_doms
        y_true_words = batch['word_labels'].cpu().numpy(
        )  # d_batch * max_n_sents * max_n_words
        n_sents = batch['n_sents'].cpu().numpy()
        n_words = batch['n_words'].cpu().numpy()
        n_samples += np.sum(n_sents)

        d_batch = len(y_true_sents)
        hyp_scores = np.tile(y_pred_vec, (d_batch, 1))

        logger.info('batch_size: {0}'.format(y_true_words.shape[0]))

        eval_args = {
            'hyp_scores': hyp_scores,
            'y_true_sents': y_true_sents,
            'y_true_words': y_true_words,
            'n_sents': n_sents,
            'n_words': n_words,
            'pred_grain': 'doc',
            'max_alter': True,
            'matching_mode': matching_mode,
        }

        eval_res = metrics_word_eval_binary.metric_eval_for_mturk_words_with_ir(
            **eval_args)

        p_list.extend(eval_res['p_list'])
        r_list.extend(eval_res['r_list'])

    exam_f1 = metrics.compute_example_based_f1(p_list, r_list)
    logger.info('word-eval. exam_f1: {0:6f}'.format(exam_f1))
예제 #4
0
파일: major_exec.py 프로젝트: yumoxu/detnet
def test_major_doc():
    data_loader = pipe.DomDetDataLoader(dataset_type='test')
    n_iter, total_loss = 0, 0.0
    n_samples, total_hamming = 0, 0.0
    cf_mats, precision_list, recall_list = list(), list(), list()

    for batch_idx, batch in enumerate(data_loader):
        n_iter += 1

        y_true = batch['labels'].cpu().numpy()  # turn vars to numpy arrays
        d_batch = len(y_true)
        y_pred = np.tile(y_pred_vec, (d_batch, 1))

        eval_args = {
            'y_true': y_true,
            'hyp_scores': y_pred,
        }

        n_samples += d_batch
        # logger.info('batch_size: {0}'.format(d_batch))
        eval_res = metrics.metric_eval(**eval_args)

        cf_mats.append(eval_res['cf_mat_list'])
        precision_list.extend(eval_res['precision_list'])
        recall_list.extend(eval_res['recall_list'])
        total_hamming += eval_res['hamming']

    cls_f1, avg_f1 = metrics.compute_f1_with_confusion_mats(cf_mats)
    example_based_f1 = metrics.compute_example_based_f1(
        precision_list=precision_list, recall_list=recall_list)
    hamming = total_hamming / n_samples

    eval_log_info = {
        'example_based_f1': example_based_f1,
        'avg_f1': avg_f1,
        'cls_f1': cls_f1,
        'hamming': hamming,
    }

    res_str = 'example_based_f1: {example_based_f1:.6f},' \
              'avg_f1: {avg_f1:.6f}, cls_f1: {cls_f1}, hamming: {hamming:.6f}'

    logger.info(res_str.format(**eval_log_info))
예제 #5
0
파일: eval_doc.py 프로젝트: yumoxu/detnet
def eval_model(model, phase, save_pred=False, save_gold=False):
    assert phase in ('dev', 'test')
    data_loader = pipe.DomDetDataLoader(dataset_type=phase)
    model.eval()

    n_iter, total_loss = 0, 0.0
    n_samples, total_hamming = 0, 0.0
    cf_mats, precision_list, recall_list = list(), list(), list()

    for batch_idx, batch in enumerate(data_loader):
        n_iter += 1

        c = copy.deepcopy
        feed_dict = c(batch)

        for (k, v) in feed_dict.items():
            feed_dict[k] = Variable(v, requires_grad=False,
                                    volatile=True)  # fix ids and masks

        loss, doc_scores = model(**feed_dict)[:2]
        total_loss += loss.data[0]

        y_true = batch['labels'].cpu().numpy()  # turn vars to numpy arrays
        hyp_scores = doc_scores.data.cpu().numpy()
        eval_args = {
            'y_true': y_true,
            'hyp_scores': hyp_scores,
        }

        if save_pred:
            eval_args['save_pred_to'] = join(path_parser.pred_doc,
                                             config_loader.meta_model_name)

        if save_gold:
            eval_args['save_true_to'] = join(path_parser.pred_doc, 'gold')

        # del model_res
        n_samples += y_true.shape[0]
        # logger.info('batch_size: {0}'.format(y_true.shape[0]))
        eval_res = metrics.metric_eval(**eval_args)

        cf_mats.append(eval_res['cf_mat_list'])
        precision_list.extend(eval_res['precision_list'])
        recall_list.extend(eval_res['recall_list'])
        total_hamming += eval_res['hamming']

    avg_loss = total_loss / n_iter
    cls_f1, avg_f1 = metrics.compute_f1_with_confusion_mats(cf_mats)
    example_based_f1 = metrics.compute_example_based_f1(
        precision_list=precision_list, recall_list=recall_list)
    hamming = total_hamming / n_samples

    eval_log_info = {
        'ph': phase,
        'loss': avg_loss,
        'example_based_f1': example_based_f1,
        'avg_f1': avg_f1,
        'cls_f1': cls_f1,
        'hamming': hamming,
    }

    return eval_log_info
예제 #6
0
파일: eval_word.py 프로젝트: yumoxu/detnet
def test_model_word_mturk_with_checkpoints(model,
                                           matching_mode=None,
                                           corpus='wiki',
                                           save_pred=False,
                                           save_gold=False,
                                           n_iter=None,
                                           restore=False):
    if corpus == 'wiki':
        save_dir = path_parser.pred_mturk_wiki
    elif corpus == 'nyt':
        if lang != 'en':
            raise ValueError('Set lang to en when NYT corpus is used')
        save_dir = path_parser.pred_mturk_nyt
    else:
        raise ValueError('Invalid corpus: {}'.format(corpus))

    if config_loader.placement == 'auto':
        model = nn.DataParallel(model, device_ids=config_loader.device)

    if config_loader.placement in ('auto', 'single'):
        model.cuda()

    logger.info('START: model testing on [MTURK WORDS]')

    checkpoint = join(path_parser.model_save, config_loader.model_name)
    if restore:
        checkpoint = join(checkpoint, 'resume')

    filter_keys = None
    if config_loader.reset_size_for_test and not config_loader.set_sep_des_size:
        logger.info('Filter DES pretrained paras...')
        filter_keys = [
            'module.word_det.des_ids', 'module.word_det.des_sent_mask',
            'module.word_det.des_word_mask'
        ]
    load_checkpoint(checkpoint=checkpoint,
                    model=model,
                    n_iter=n_iter,
                    filter_keys=filter_keys)

    grain = 'word'
    dataset_type = '-'.join(('mturk', corpus, grain))
    data_loader = pipe.DomDetDataLoader(dataset_type=dataset_type)
    model.eval()

    c = copy.deepcopy
    pred_grain = get_model_pred_grain()
    p_list = list()
    r_list = list()
    y_true_sents_list = list()
    n_sents_list = list()

    for batch_idx, batch in enumerate(data_loader):

        feed_dict = c(batch)

        del feed_dict['sent_labels']
        del feed_dict['word_labels']
        del feed_dict['n_sents']
        del feed_dict['n_words']

        for (k, v) in feed_dict.items():
            feed_dict[k] = Variable(v, requires_grad=False,
                                    volatile=True)  # fix ids and masks

        if pred_grain == 'doc':
            _, doc_scores = model(**feed_dict)
            hyp_scores = doc_scores.data.cpu().numpy()
        elif pred_grain == 'sent':
            _, _, sent_scores = model(**feed_dict)
            hyp_scores = sent_scores.data.cpu().numpy()
        elif pred_grain == 'word':
            feed_dict['return_sent_attn'] = True
            feed_dict['return_word_attn'] = True
            _, _, _, word_scores, _, word_attn = model(**feed_dict)

            hyp_scores = word_scores.data.cpu().numpy(
            )  # n_batch * n_sents * n_words * n_doms
        else:
            raise ValueError('Invalid prediction grain: {}'.format(pred_grain))

        # turn vars to numpy arrays
        y_true_sents = batch['sent_labels'].cpu().numpy(
        )  # d_batch * max_n_sents * n_doms
        y_true_words = batch['word_labels'].cpu().numpy(
        )  # d_batch * max_n_sents * max_n_words
        n_sents = batch['n_sents'].cpu().numpy()
        n_words = batch['n_words'].cpu().numpy()

        logger.info('batch_size: {0}'.format(y_true_words.shape[0]))

        eval_args = {
            'hyp_scores': hyp_scores,
            'y_true_sents': y_true_sents,
            'y_true_words': y_true_words,
            'n_sents': n_sents,
            'n_words': n_words,
            'pred_grain': pred_grain,
            'max_alter': True,
            'matching_mode': matching_mode,
        }

        if save_pred:
            fn = '_'.join((grain, config_loader.meta_model_name))
            pred_save_fp = join(save_dir, fn)
            eval_args['save_pred_to'] = pred_save_fp

        if save_gold:
            fn = '_'.join((grain, 'gold'))
            true_save_fp = join(save_dir, fn)
            eval_args['save_true_to'] = true_save_fp

        eval_res = metrics_word_eval_binary.metric_eval_for_mturk_words_with_ir(
            **eval_args)

        p_list.extend(eval_res['p_list'])
        r_list.extend(eval_res['r_list'])
        y_true_sents_list.append(y_true_sents)
        n_sents_list.append(n_sents)

    exam_f1 = metrics.compute_example_based_f1(p_list, r_list)
    logger.info('word-eval. exam_f1: {0:6f}'.format(exam_f1))

    report_dom_specific_f1(p_list, r_list, y_true_sents_list[0],
                           n_sents_list[0])