Пример #1
0
                orig_len += len(para["context"])

        assert orig_len == len(exp3_source["super_context"]
                               ), "the summed context lengths should line up"

        fancyprint("Testing that there are the same number of answers")

        orig_ans_count, exp3_ans_count = 0, 0

        for article in tqdm(orig_source["data"]):
            for para in article["paragraphs"]:
                for qas in para["qas"]:
                    for answer in qas["answers"]:
                        orig_ans_count += 1

        for topic in tqdm(exp3_source["data"]):
            for qas in topic["qas"]:
                for answer in qas["answers"]:
                    exp3_ans_count += 1

        assert orig_ans_count == exp3_ans_count, "the answer counts should line up"


if __name__ == "__main__":
    data = config.data()
    test_exp2_data_transform(orig_data=data.train_data_orig,
                             transformed_data=data.train_data_exp2)
    test_exp3_data_transform(orig_data=data.train_data_orig,
                             transformed_data=data.train_data_exp3)
    print()
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, max_len, use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict


if __name__ == '__main__':
    main(data=config.data(), flags=sys.argv)
Пример #3
0
def geConfig(classes, trainPath, validPath, namesPath, batchs):
    dataString = config.data(classes, trainPath, validPath, namesPath)
    open(join(wd, sets, 'voc.data'), 'w').write(dataString)
    cfgString = config.cfg(classes, batchs)
    open(join(wd, sets, 'voc.cfg'), 'w').write(cfgString)