Exemplo n.º 1
0
def gold_evaluation(sentences, extractor, gazetteer, model_cls, model_args):
    logger.info("Evaluating on the gold sentences")

    for each in sentences:
        if not each.get("gold_fes"):
            extractor.process_sentence(each["sentence"], each["lu"], each["fes"], add_unknown=True, gazetteer=gazetteer)
    x_tr, y_tr = extractor.get_features(refit=True)

    extractor.start()
    tagged_gold = []
    for each in sentences:
        if each.get("gold_fes"):
            tagged_gold.append(
                (
                    each["gold_fes"],
                    extractor.process_sentence(
                        each["sentence"], each["lu"], each["fes"], add_unknown=False, gazetteer=gazetteer
                    ),
                )
            )

    if not tagged_gold:
        logger.warn("asked to evaluate gold, but no gold sentences found")
        return

    x_gold, _ = extractor.get_features(refit=False)
    y_gold = []
    for gold_fes, tagged in tagged_gold:
        for chunk, is_sample in tagged:
            if is_sample:
                y_gold.append([extractor.label_index[fe or "O"] for fe in gold_fes.get(chunk, [])])

    assert len(y_gold) == x_gold.shape[0]

    model = FeatureSelectedClassifier(model_cls, extractor.lu_column(), model_args)
    model.fit(x_tr, y_tr)
    y_pred = model.predict(x_gold)

    correct = len([1 for actual, predicted in zip(y_gold, y_pred) if predicted in actual])
    logger.info("Gold accuracy: %f (%d / %d roles)", float(correct) / len(y_gold), correct, len(y_gold))
Exemplo n.º 2
0
def gold_evaluation(sentences, extractor, gazetteer, model_cls, model_args):
    logger.info('Evaluating on the gold sentences')

    for each in sentences:
        if not each.get('gold_fes'):
            extractor.process_sentence(each['sentence'], each['lu'], each['fes'],
                                       add_unknown=True, gazetteer=gazetteer)
    x_tr, y_tr = extractor.get_features(refit=True)

    extractor.start()
    tagged_gold = []
    for each in sentences:
        if each.get('gold_fes'):
            tagged_gold.append((each['gold_fes'], extractor.process_sentence(
                each['sentence'], each['lu'], each['fes'],
                add_unknown=False, gazetteer=gazetteer
            )))

    if not tagged_gold:
        logger.warn('asked to evaluate gold, but no gold sentences found')
        return

    x_gold, _ = extractor.get_features(refit=False)
    y_gold = []
    for gold_fes, tagged in tagged_gold:
        for chunk, is_sample in tagged:
            if is_sample:
                y_gold.append([extractor.label_index[fe or 'O'] for fe in gold_fes.get(chunk, [])])

    assert len(y_gold) == x_gold.shape[0]

    model = FeatureSelectedClassifier(model_cls, extractor.lu_column(), model_args)
    model.fit(x_tr, y_tr)
    y_pred = model.predict(x_gold)

    correct = len([1 for actual, predicted in zip(y_gold, y_pred) if predicted in actual])
    logger.info('Gold accuracy: %f (%d / %d roles)', float(correct) / len(y_gold),
                correct, len(y_gold))