Python Cas.Cas示例，cassis.cas.Cas.Cas Python示例

示例#1

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_default_typesystem_is_not_shared():
    # https://github.com/dkpro/dkpro-cassis/issues/67
    cas1 = Cas()
    cas2 = Cas()

    t1 = cas1.typesystem.create_type(name="test.Type")
    t2 = cas2.typesystem.create_type(name="test.Type")

示例#2

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_get_view_throws_if_view_does_not_exist():
    cas = Cas()

    with pytest.raises(
            KeyError,
            message=r"There is no view with name [testView] in this CAS!"):
        cas.get_view("testView")

示例#3

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_select_covering_also_returns_parent_instances(small_typesystem_xml,
                                                       tokens, sentences):
    typesystem = load_typesystem(small_typesystem_xml)
    SubSentenceType = typesystem.create_type("cassis.SubSentence",
                                             supertypeName="cassis.Sentence")

    cas = Cas(typesystem=typesystem)

    first_sentence, second_sentence = sentences
    annotations = tokens + sentences
    subsentence1 = SubSentenceType(begin=first_sentence.begin,
                                   end=first_sentence.end)
    subsentence2 = SubSentenceType(begin=second_sentence.begin,
                                   end=second_sentence.end)
    annotations.append(subsentence1)
    annotations.append(subsentence2)
    cas.add_annotations(annotations)

    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    for token in tokens_in_first_sentence:
        result = set(cas.select_covering("cassis.Sentence", token))

        assert result == {first_sentence, subsentence1}

    for token in tokens_in_second_sentence:
        result = set(cas.select_covering("cassis.Sentence", token))

        assert result == {second_sentence, subsentence2}

示例#4

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_select_covered_also_returns_parent_instances(small_typesystem_xml,
                                                      tokens, sentences):
    typesystem = load_typesystem(small_typesystem_xml)
    SubTokenType = typesystem.create_type("cassis.SubToken",
                                          supertypeName="cassis.Token")

    annotations = tokens + sentences
    subtoken1 = SubTokenType(begin=tokens[2].begin, end=tokens[3].end)
    subtoken2 = SubTokenType(begin=tokens[8].begin, end=tokens[8].end)
    annotations.append(subtoken1)
    annotations.append(subtoken2)

    cas = Cas(typesystem=typesystem)
    cas.add_annotations(annotations)

    first_sentence, second_sentence = sentences
    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    actual_tokens_in_first_sentence = list(
        cas.select_covered("cassis.Token", first_sentence))
    actual_tokens_in_second_sentence = list(
        cas.select_covered("cassis.Token", second_sentence))

    assert set(actual_tokens_in_first_sentence) == set(
        tokens_in_first_sentence + [subtoken1])
    assert set(actual_tokens_in_second_sentence) == set(
        tokens_in_second_sentence + [subtoken2])

示例#5

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_create_view_throws_if_view_already_exists():
    cas = Cas()
    cas.create_view("testView")

    with pytest.raises(ValueError,
                       message=r"A view with name [testView] already exists!"):
        cas.create_view("testView")

示例#6

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_initial_view_is_created():
    cas = Cas()

    view = cas.get_view("_InitialView")

    sofa = view.get_sofa()
    attr.validate(sofa)
    assert sofa.sofaID == "_InitialView"

示例#7

0

显示文件

def test_FeatureStructure_get_covered_text_tokens(tokens):
    cas = Cas()
    cas.sofa_string = "Joe waited for the train . The train was late ."

    actual_text = [token.get_covered_text() for token in tokens]

    expected_text = ["Joe", "waited", "for", "the", "train", ".", "The", "train", "was", "late", "."]
    assert actual_text == expected_text

示例#8

0

显示文件

def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(annotations)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))

    assert set(actual_annotations) == set(annotations)

示例#9

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_get_covered_text_sentences(sentences):
    cas = Cas()
    cas.sofa_string = "Joe waited for the train . The train was late ."

    actual_text = [cas.get_covered_text(sentence) for sentence in sentences]

    expected_text = ["Joe waited for the train .", "The train was late ."]
    assert actual_text == expected_text

示例#10

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    TokenType = typesystem.get_type("cassis.Token")
    token = TokenType(begin=0, end=3, id="0", pos="NNP")

    cas = Cas(lenient=True)
    cas.add_annotation(token)

示例#11

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_create_view_creates_view():
    cas = Cas()

    view = cas.create_view("testView")
    sofa = view.get_sofa()

    attr.validate(sofa)
    assert sofa.sofaID == "testView"

示例#12

0

显示文件

    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem):
        # namespaces
        NS_XMI = "{http://www.omg.org/XMI}"
        NS_CAS = "{http:///uima/cas.ecore}"

        TAG_XMI = NS_XMI + "XMI"
        TAG_CAS_NULL = NS_CAS + "NULL"
        TAG_CAS_SOFA = NS_CAS + "Sofa"
        TAG_CAS_VIEW = NS_CAS + "View"

        sofas = []
        views = {}
        annotations = {}

        context = etree.iterparse(source, events=("end",))

        for event, elem in context:
            assert event == "end"

            if elem.tag == TAG_XMI:
                # Ignore the closing 'xmi:XMI' tag
                pass
            elif elem.tag == TAG_CAS_NULL:
                pass
            elif elem.tag == TAG_CAS_SOFA:
                sofa = self._parse_sofa(elem)
                sofas.append(sofa)
            elif elem.tag == TAG_CAS_VIEW:
                proto_view = self._parse_view(elem)
                views[proto_view.sofa] = proto_view
            else:
                annotation = self._parse_annotation(typesystem, elem)
                annotations[annotation.xmiID] = annotation

            # Free already processed elements from memory
            self._clear_elem(elem)

        if len(sofas) != len(views):
            raise RuntimeError("Number of views and sofas is not equal!")

        cas = Cas()
        for sofa in sofas:
            proto_view = views[sofa.xmiID]

            if sofa.sofaID == "_InitialView":
                view = cas.get_view("_InitialView")
            else:
                view = cas.create_view(sofa.sofaID)

            view.sofa_string = sofa.sofaString
            view.sofa_mime = sofa.mimeType

            for member_id in proto_view.members:
                annotation = annotations[member_id]

                view.add_annotation(annotation)

        return cas

示例#13

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_select(small_typesystem_xml, tokens, sentences):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens + sentences)

    actual_tokens = list(cas.select("cassis.Token"))
    actual_sentences = list(cas.select("cassis.Sentence"))

    assert actual_tokens == tokens
    assert actual_sentences == sentences

示例#14

0

显示文件

def test_leniency_type_not_in_typeystem_not_lenient(small_typesystem_xml):
    typesystem = load_typesystem(small_typesystem_xml)

    TokenType = typesystem.get_type("cassis.Token")
    token = TokenType(begin=0, end=3, id="0", pos="NNP")

    cas = Cas()
    with pytest.raises(RuntimeError, match="Typesystem of CAS does not contain type"):
        cas.add_annotation(token)

示例#15

0

显示文件

def test_select(tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(annotations=annotations)

    actual_tokens = list(cas.select('cassis.Token'))
    actual_sentences = list(cas.select('cassis.Sentence'))

    assert actual_tokens == tokens
    assert actual_sentences == sentences

示例#16

0

显示文件

def test_get_covered_text_sentences(sentences):
    sofa = Sofa(sofaNum=1,
                sofaString='Joe waited for the train . The train was late .')
    cas = Cas(annotations=sentences, sofas=[sofa])

    actual_text = [cas.get_covered_text(sentence) for sentence in sentences]

    expected_text = ['Joe waited for the train .', 'The train was late .']
    assert actual_text == expected_text

示例#17

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_select(tokens, sentences):
    cas = Cas()
    cas.add_annotations(tokens + sentences)

    actual_tokens = list(cas.select("cassis.Token"))
    actual_sentences = list(cas.select("cassis.Sentence"))

    assert actual_tokens == tokens
    assert actual_sentences == sentences

示例#18

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_removing_throws_if_fs_in_other_view(small_typesystem_xml, tokens,
                                             sentences):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens)

    view = cas.create_view("testView")

    with pytest.raises(ValueError):
        view.remove_annotation(tokens[0])

示例#19

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_annotations_are_ordered_correctly(tokens):
    annotations = list(tokens)
    random.shuffle(list(annotations))
    cas = Cas()
    for token in annotations:
        cas.add_annotation(token)

    actual_tokens = list(cas.select("cassis.Token"))

    assert actual_tokens == tokens

示例#20

0

显示文件

def test_select_only_returns_annotations_of_current_view(tokens, sentences, small_typesystem_xml):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens)
    view = cas.create_view("testView")
    view.add_annotations(sentences)

    actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all())
    actual_annotations_in_test_view = list(cas.get_view("testView").select_all())

    assert tokens == actual_annotations_in_initial_view
    assert sentences == actual_annotations_in_test_view

示例#21

0

显示文件

def test_select_covered(small_typesystem_xml, tokens, sentences):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(tokens + sentences)
    first_sentence, second_sentence = sentences
    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    actual_tokens_in_first_sentence = list(cas.select_covered("cassis.Token", first_sentence))
    actual_tokens_in_second_sentence = list(cas.select_covered("cassis.Token", second_sentence))

    assert actual_tokens_in_first_sentence == tokens_in_first_sentence
    assert actual_tokens_in_second_sentence == tokens_in_second_sentence

示例#22

0

显示文件

def test_get_covered_text_tokens(tokens):
    sofa = Sofa(sofaNum=1,
                sofaString='Joe waited for the train . The train was late .')
    cas = Cas(annotations=tokens, sofas=[sofa])

    actual_text = [cas.get_covered_text(token) for token in tokens]

    expected_text = [
        'Joe', 'waited', 'for', 'the', 'train', '.', 'The', 'train', 'was',
        'late', '.'
    ]
    assert actual_text == expected_text

示例#23

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens):
    typesystem = load_typesystem(small_typesystem_xml)
    cas = Cas(typesystem)

    annotations = list(tokens)
    random.shuffle(list(annotations))

    for token in annotations:
        cas.add_annotation(token)

    actual_tokens = list(cas.select("cassis.Token"))

    assert actual_tokens == tokens

示例#24

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_get_view_finds_existing_view():
    cas = Cas()
    cas.create_view("testView")
    cas.sofa_string = "Initial"

    view = cas.get_view("testView")
    view.sofa_string = "testView42"

    sofa = view.get_sofa()
    attr.validate(sofa)
    assert sofa.sofaID == "testView"
    assert cas.sofa_string == "Initial"
    assert view.sofa_string == "testView42"

示例#25

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    view = cas.create_view("testView")

    cas.add_annotations(annotations)
    view.add_annotations(annotations)

    for annotation in annotations:
        cas.remove_annotation(annotation)

    assert set(cas.select("uima.tcas.Annotation")) == set()
    assert set(view.select("uima.tcas.Annotation")) == set(annotations)

示例#26

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_select_covered(tokens, sentences):
    cas = Cas()
    cas.add_annotations(tokens + sentences)
    first_sentence, second_sentence = sentences
    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    actual_tokens_in_first_sentence = list(
        cas.select_covered("cassis.Token", first_sentence))
    actual_tokens_in_second_sentence = list(
        cas.select_covered("cassis.Token", second_sentence))

    assert actual_tokens_in_first_sentence == tokens_in_first_sentence
    assert actual_tokens_in_second_sentence == tokens_in_second_sentence

示例#27

0

显示文件

def test_select_covered(tokens, sentences):
    annotations = tokens + sentences
    cas = Cas(annotations=annotations)
    first_sentence, second_sentence = sentences
    tokens_in_first_sentence = tokens[:6]
    tokens_in_second_sentence = tokens[6:]

    actual_tokens_in_first_sentence = list(
        cas.select_covered('cassis.Token', first_sentence))
    actual_tokens_in_second_sentence = list(
        cas.select_covered('cassis.Token', second_sentence))

    assert actual_tokens_in_first_sentence == tokens_in_first_sentence
    assert actual_tokens_in_second_sentence == tokens_in_second_sentence

示例#28

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_removing_of_existing_fs_works(small_typesystem_xml, tokens,
                                       sentences):
    annotations = tokens + sentences
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
    cas.add_annotations(annotations)

    for token in tokens:
        cas.remove_annotation(token)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))
    assert set(actual_annotations) == set(sentences)

    for sentence in sentences:
        cas.remove_annotation(sentence)

    actual_annotations = list(cas.select("uima.tcas.Annotation"))
    assert set(actual_annotations) == set()

示例#29

0

显示文件

文件： test_cas.py 项目： hatzel/dkpro-cassis

def test_select_covered_overlapping(small_typesystem_xml, tokens, sentences):
    cas = Cas(typesystem=load_typesystem(small_typesystem_xml))

    AnnotationType = cas.typesystem.create_type("test.Annotation")
    SentenceType = cas.typesystem.get_type("cassis.Sentence")
    sentence = SentenceType(begin=0, end=10)
    annotations = [
        AnnotationType(begin=0, end=5),
        AnnotationType(begin=0, end=5)
    ]

    cas.add_annotation(sentence)
    cas.add_annotations(annotations)

    actual_annotations = list(cas.select_covered("test.Annotation", sentence))

    assert actual_annotations == annotations

示例#30

0

显示文件

文件： test_cas.py 项目： malteos/dkpro-cassis

def test_add_annotation_generates_ids(small_typesystem_xml, tokens):
    typesystem = load_typesystem(small_typesystem_xml)
    cas = Cas(typesystem)
    TokenType = typesystem.get_type("cassis.Token")

    tokens = [
        TokenType(begin=0, end=3, id="0", pos="NNP"),
        TokenType(begin=4, end=10, id="1", pos="VBD"),
        TokenType(begin=11, end=14, id="2", pos="IN"),
        TokenType(begin=15, end=18, id="3", pos="DT"),
        TokenType(begin=19, end=24, id="4", pos="NN"),
        TokenType(begin=25, end=26, id="5", pos="."),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert all([token.xmiID is not None for token in actual_tokens])