示例#1
0
def test_iob_to_biluo():
    good_iob = ["O", "O", "B-LOC", "I-LOC", "O", "B-PERSON"]
    good_biluo = ["O", "O", "B-LOC", "L-LOC", "O", "U-PERSON"]
    bad_iob = ["O", "O", '"', "B-LOC", "I-LOC"]
    converted_biluo = iob_to_biluo(good_iob)
    assert good_biluo == converted_biluo
    with pytest.raises(ValueError):
        iob_to_biluo(bad_iob)
def test_issue2385():
    """Test that IOB tags are correctly converted to BILUO tags."""
    # fix bug in labels with a 'b' character
    tags1 = ("B-BRAWLER", "I-BRAWLER", "I-BRAWLER")
    assert iob_to_biluo(tags1) == ["B-BRAWLER", "I-BRAWLER", "L-BRAWLER"]
    # maintain support for iob1 format
    tags2 = ("I-ORG", "I-ORG", "B-ORG")
    assert iob_to_biluo(tags2) == ["B-ORG", "L-ORG", "U-ORG"]
    # maintain support for iob2 format
    tags3 = ("B-PERSON", "I-PERSON", "B-PERSON")
    assert iob_to_biluo(tags3) == ["B-PERSON", "L-PERSON", "U-PERSON"]
示例#3
0
    def from_spacy_doc(cls,
                       doc,
                       map_spacy_entities_to_presidio=True,
                       scheme="BILUO"):
        if scheme not in ("BILUO", "BILOU", "BIO", "IOB"):
            raise ValueError(
                "scheme should be one of \"BILUO\",\"BILOU\",\"BIO\",\"IOB\"")

        spans = []
        for ent in doc.ents:
            entity_type = (cls.rename_from_spacy_tags(ent.label_)
                           if map_spacy_entities_to_presidio else ent.label_)
            span = Span(
                entity_type=entity_type,
                entity_value=ent.text,
                start_position=ent.start_char,
                end_position=ent.end_char,
            )
            spans.append(span)

        tags = [
            f"{token.ent_iob_}-{token.ent_type_}"
            if token.ent_iob_ != "O" else "O" for token in doc
        ]
        if scheme in ("BILUO", "BILOU"):
            tags = iob_to_biluo(tags)

        return cls(full_text=doc.text,
                   masked=None,
                   spans=spans,
                   tokens=doc,
                   tags=tags,
                   create_tags_from_span=False,
                   scheme=scheme)
    def from_spacy_doc(cls,
                       doc: Doc,
                       translate_tags: bool = True,
                       scheme: str = "BILUO") -> "InputSample":
        if scheme not in ("BILUO", "BILOU", "BIO", "IOB"):
            raise ValueError(
                'scheme should be one of "BILUO","BILOU","BIO","IOB"')

        spans = []
        for ent in doc.ents:
            entity_type = (cls.rename_from_spacy_tag(ent.label_)
                           if translate_tags else ent.label_)
            span = Span(
                entity_type=entity_type,
                entity_value=ent.text,
                start_position=ent.start_char,
                end_position=ent.end_char,
            )
            spans.append(span)

        tags = [
            f"{token.ent_iob_}-{token.ent_type_}"
            if token.ent_iob_ != "O" else "O" for token in doc
        ]
        if scheme in ("BILUO", "BILOU"):
            tags = iob_to_biluo(tags)

        return cls(
            full_text=doc.text,
            masked=None,
            spans=spans,
            tokens=doc,
            tags=tags,
            create_tags_from_span=False,
            scheme=scheme,
        )
def test_issue2385_biluo(tags):
    """Test that BILUO-compatible tags aren't modified."""
    assert iob_to_biluo(tags) == list(tags)