Exemplo n.º 1
0
def test_Example_init_requires_doc_objects():
    vocab = Vocab()
    with pytest.raises(TypeError):
        Example(None, None)
    with pytest.raises(TypeError):
        Example(Doc(vocab, words=["hi"]), None)
    with pytest.raises(TypeError):
        Example(None, Doc(vocab, words=["hi"]))
Exemplo n.º 2
0
def test_Example_aligned_whitespace(en_vocab):
    words = ["a", " ", "b"]
    tags = ["A", "SPACE", "B"]
    predicted = Doc(en_vocab, words=words)
    reference = Doc(en_vocab, words=words, tags=tags)

    example = Example(predicted, reference)
    assert example.get_aligned("TAG", as_string=True) == tags
Exemplo n.º 3
0
def main(trained_pipeline: Path, test_data: Path, print_details: bool):
    nlp = spacy.load(trained_pipeline)

    doc_bin = DocBin(store_user_data=True).from_disk(test_data)
    docs = doc_bin.get_docs(nlp.vocab)
    examples = []
    for gold in docs:
        pred = Doc(
            nlp.vocab,
            words=[t.text for t in gold],
            spaces=[t.whitespace_ for t in gold],
        )
        pred.ents = gold.ents
        for name, proc in nlp.pipeline:
            pred = proc(pred)
        examples.append(Example(pred, gold))

        # Print the gold and prediction, if gold label is not 0
        if print_details:
            print()
            print(f"Text: {gold.text}")
            print(f"spans: {[(e.start, e.text, e.label_) for e in pred.ents]}")
            for value, rel_dict in pred._.rel.items():
                gold_labels = [
                    k for (k, v) in gold._.rel[value].items() if v == 1.0
                ]
                if gold_labels:
                    print(
                        f" pair: {value} --> gold labels: {gold_labels} --> predicted values: {rel_dict}"
                    )
            print()

    random_examples = []
    docs = doc_bin.get_docs(nlp.vocab)
    for gold in docs:
        pred = Doc(
            nlp.vocab,
            words=[t.text for t in gold],
            spaces=[t.whitespace_ for t in gold],
        )
        pred.ents = gold.ents
        relation_extractor = nlp.get_pipe("relation_extractor")
        get_instances = relation_extractor.model.attrs["get_instances"]
        for (e1, e2) in get_instances(pred):
            offset = (e1.start, e2.start)
            if offset not in pred._.rel:
                pred._.rel[offset] = {}
            for label in relation_extractor.labels:
                pred._.rel[offset][label] = random.uniform(0, 1)
        random_examples.append(Example(pred, gold))

    thresholds = [
        0.000, 0.050, 0.100, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99,
        0.999
    ]
    print()
    print("Random baseline:")
    _score_and_format(random_examples, thresholds)

    print()
    print("Results of the trained model:")
    _score_and_format(examples, thresholds)