def test_default_typesystem_is_not_shared(): # https://github.com/dkpro/dkpro-cassis/issues/67 cas1 = Cas() cas2 = Cas() t1 = cas1.typesystem.create_type(name="test.Type") t2 = cas2.typesystem.create_type(name="test.Type")
def test_get_view_throws_if_view_does_not_exist(): cas = Cas() with pytest.raises( KeyError, message=r"There is no view with name [testView] in this CAS!"): cas.get_view("testView")
def test_select_covering_also_returns_parent_instances(small_typesystem_xml, tokens, sentences): typesystem = load_typesystem(small_typesystem_xml) SubSentenceType = typesystem.create_type("cassis.SubSentence", supertypeName="cassis.Sentence") cas = Cas(typesystem=typesystem) first_sentence, second_sentence = sentences annotations = tokens + sentences subsentence1 = SubSentenceType(begin=first_sentence.begin, end=first_sentence.end) subsentence2 = SubSentenceType(begin=second_sentence.begin, end=second_sentence.end) annotations.append(subsentence1) annotations.append(subsentence2) cas.add_annotations(annotations) tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] for token in tokens_in_first_sentence: result = set(cas.select_covering("cassis.Sentence", token)) assert result == {first_sentence, subsentence1} for token in tokens_in_second_sentence: result = set(cas.select_covering("cassis.Sentence", token)) assert result == {second_sentence, subsentence2}
def test_select_covered_also_returns_parent_instances(small_typesystem_xml, tokens, sentences): typesystem = load_typesystem(small_typesystem_xml) SubTokenType = typesystem.create_type("cassis.SubToken", supertypeName="cassis.Token") annotations = tokens + sentences subtoken1 = SubTokenType(begin=tokens[2].begin, end=tokens[3].end) subtoken2 = SubTokenType(begin=tokens[8].begin, end=tokens[8].end) annotations.append(subtoken1) annotations.append(subtoken2) cas = Cas(typesystem=typesystem) cas.add_annotations(annotations) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] actual_tokens_in_first_sentence = list( cas.select_covered("cassis.Token", first_sentence)) actual_tokens_in_second_sentence = list( cas.select_covered("cassis.Token", second_sentence)) assert set(actual_tokens_in_first_sentence) == set( tokens_in_first_sentence + [subtoken1]) assert set(actual_tokens_in_second_sentence) == set( tokens_in_second_sentence + [subtoken2])
def test_create_view_throws_if_view_already_exists(): cas = Cas() cas.create_view("testView") with pytest.raises(ValueError, message=r"A view with name [testView] already exists!"): cas.create_view("testView")
def test_initial_view_is_created(): cas = Cas() view = cas.get_view("_InitialView") sofa = view.get_sofa() attr.validate(sofa) assert sofa.sofaID == "_InitialView"
def test_FeatureStructure_get_covered_text_tokens(tokens): cas = Cas() cas.sofa_string = "Joe waited for the train . The train was late ." actual_text = [token.get_covered_text() for token in tokens] expected_text = ["Joe", "waited", "for", "the", "train", ".", "The", "train", "was", "late", "."] assert actual_text == expected_text
def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(annotations) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set(annotations)
def test_get_covered_text_sentences(sentences): cas = Cas() cas.sofa_string = "Joe waited for the train . The train was late ." actual_text = [cas.get_covered_text(sentence) for sentence in sentences] expected_text = ["Joe waited for the train .", "The train was late ."] assert actual_text == expected_text
def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type("cassis.Token") token = TokenType(begin=0, end=3, id="0", pos="NNP") cas = Cas(lenient=True) cas.add_annotation(token)
def test_create_view_creates_view(): cas = Cas() view = cas.create_view("testView") sofa = view.get_sofa() attr.validate(sofa) assert sofa.sofaID == "testView"
def deserialize(self, source: Union[IO, str], typesystem: TypeSystem): # namespaces NS_XMI = "{http://www.omg.org/XMI}" NS_CAS = "{http:///uima/cas.ecore}" TAG_XMI = NS_XMI + "XMI" TAG_CAS_NULL = NS_CAS + "NULL" TAG_CAS_SOFA = NS_CAS + "Sofa" TAG_CAS_VIEW = NS_CAS + "View" sofas = [] views = {} annotations = {} context = etree.iterparse(source, events=("end",)) for event, elem in context: assert event == "end" if elem.tag == TAG_XMI: # Ignore the closing 'xmi:XMI' tag pass elif elem.tag == TAG_CAS_NULL: pass elif elem.tag == TAG_CAS_SOFA: sofa = self._parse_sofa(elem) sofas.append(sofa) elif elem.tag == TAG_CAS_VIEW: proto_view = self._parse_view(elem) views[proto_view.sofa] = proto_view else: annotation = self._parse_annotation(typesystem, elem) annotations[annotation.xmiID] = annotation # Free already processed elements from memory self._clear_elem(elem) if len(sofas) != len(views): raise RuntimeError("Number of views and sofas is not equal!") cas = Cas() for sofa in sofas: proto_view = views[sofa.xmiID] if sofa.sofaID == "_InitialView": view = cas.get_view("_InitialView") else: view = cas.create_view(sofa.sofaID) view.sofa_string = sofa.sofaString view.sofa_mime = sofa.mimeType for member_id in proto_view.members: annotation = annotations[member_id] view.add_annotation(annotation) return cas
def test_select(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(tokens + sentences) actual_tokens = list(cas.select("cassis.Token")) actual_sentences = list(cas.select("cassis.Sentence")) assert actual_tokens == tokens assert actual_sentences == sentences
def test_leniency_type_not_in_typeystem_not_lenient(small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type("cassis.Token") token = TokenType(begin=0, end=3, id="0", pos="NNP") cas = Cas() with pytest.raises(RuntimeError, match="Typesystem of CAS does not contain type"): cas.add_annotation(token)
def test_select(tokens, sentences): annotations = tokens + sentences cas = Cas(annotations=annotations) actual_tokens = list(cas.select('cassis.Token')) actual_sentences = list(cas.select('cassis.Sentence')) assert actual_tokens == tokens assert actual_sentences == sentences
def test_get_covered_text_sentences(sentences): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train . The train was late .') cas = Cas(annotations=sentences, sofas=[sofa]) actual_text = [cas.get_covered_text(sentence) for sentence in sentences] expected_text = ['Joe waited for the train .', 'The train was late .'] assert actual_text == expected_text
def test_select(tokens, sentences): cas = Cas() cas.add_annotations(tokens + sentences) actual_tokens = list(cas.select("cassis.Token")) actual_sentences = list(cas.select("cassis.Sentence")) assert actual_tokens == tokens assert actual_sentences == sentences
def test_removing_throws_if_fs_in_other_view(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(tokens) view = cas.create_view("testView") with pytest.raises(ValueError): view.remove_annotation(tokens[0])
def test_annotations_are_ordered_correctly(tokens): annotations = list(tokens) random.shuffle(list(annotations)) cas = Cas() for token in annotations: cas.add_annotation(token) actual_tokens = list(cas.select("cassis.Token")) assert actual_tokens == tokens
def test_select_only_returns_annotations_of_current_view(tokens, sentences, small_typesystem_xml): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(tokens) view = cas.create_view("testView") view.add_annotations(sentences) actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all()) actual_annotations_in_test_view = list(cas.get_view("testView").select_all()) assert tokens == actual_annotations_in_initial_view assert sentences == actual_annotations_in_test_view
def test_select_covered(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(tokens + sentences) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] actual_tokens_in_first_sentence = list(cas.select_covered("cassis.Token", first_sentence)) actual_tokens_in_second_sentence = list(cas.select_covered("cassis.Token", second_sentence)) assert actual_tokens_in_first_sentence == tokens_in_first_sentence assert actual_tokens_in_second_sentence == tokens_in_second_sentence
def test_get_covered_text_tokens(tokens): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train . The train was late .') cas = Cas(annotations=tokens, sofas=[sofa]) actual_text = [cas.get_covered_text(token) for token in tokens] expected_text = [ 'Joe', 'waited', 'for', 'the', 'train', '.', 'The', 'train', 'was', 'late', '.' ] assert actual_text == expected_text
def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens): typesystem = load_typesystem(small_typesystem_xml) cas = Cas(typesystem) annotations = list(tokens) random.shuffle(list(annotations)) for token in annotations: cas.add_annotation(token) actual_tokens = list(cas.select("cassis.Token")) assert actual_tokens == tokens
def test_get_view_finds_existing_view(): cas = Cas() cas.create_view("testView") cas.sofa_string = "Initial" view = cas.get_view("testView") view.sofa_string = "testView42" sofa = view.get_sofa() attr.validate(sofa) assert sofa.sofaID == "testView" assert cas.sofa_string == "Initial" assert view.sofa_string == "testView42"
def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) view = cas.create_view("testView") cas.add_annotations(annotations) view.add_annotations(annotations) for annotation in annotations: cas.remove_annotation(annotation) assert set(cas.select("uima.tcas.Annotation")) == set() assert set(view.select("uima.tcas.Annotation")) == set(annotations)
def test_select_covered(tokens, sentences): cas = Cas() cas.add_annotations(tokens + sentences) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] actual_tokens_in_first_sentence = list( cas.select_covered("cassis.Token", first_sentence)) actual_tokens_in_second_sentence = list( cas.select_covered("cassis.Token", second_sentence)) assert actual_tokens_in_first_sentence == tokens_in_first_sentence assert actual_tokens_in_second_sentence == tokens_in_second_sentence
def test_select_covered(tokens, sentences): annotations = tokens + sentences cas = Cas(annotations=annotations) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] actual_tokens_in_first_sentence = list( cas.select_covered('cassis.Token', first_sentence)) actual_tokens_in_second_sentence = list( cas.select_covered('cassis.Token', second_sentence)) assert actual_tokens_in_first_sentence == tokens_in_first_sentence assert actual_tokens_in_second_sentence == tokens_in_second_sentence
def test_removing_of_existing_fs_works(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) cas.add_annotations(annotations) for token in tokens: cas.remove_annotation(token) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set(sentences) for sentence in sentences: cas.remove_annotation(sentence) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set()
def test_select_covered_overlapping(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) AnnotationType = cas.typesystem.create_type("test.Annotation") SentenceType = cas.typesystem.get_type("cassis.Sentence") sentence = SentenceType(begin=0, end=10) annotations = [ AnnotationType(begin=0, end=5), AnnotationType(begin=0, end=5) ] cas.add_annotation(sentence) cas.add_annotations(annotations) actual_annotations = list(cas.select_covered("test.Annotation", sentence)) assert actual_annotations == annotations
def test_add_annotation_generates_ids(small_typesystem_xml, tokens): typesystem = load_typesystem(small_typesystem_xml) cas = Cas(typesystem) TokenType = typesystem.get_type("cassis.Token") tokens = [ TokenType(begin=0, end=3, id="0", pos="NNP"), TokenType(begin=4, end=10, id="1", pos="VBD"), TokenType(begin=11, end=14, id="2", pos="IN"), TokenType(begin=15, end=18, id="3", pos="DT"), TokenType(begin=19, end=24, id="4", pos="NN"), TokenType(begin=25, end=26, id="5", pos="."), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert all([token.xmiID is not None for token in actual_tokens])