def _parse_sofa(self, elem) -> Sofa: attributes = dict(elem.attrib) attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id")) attributes["sofaNum"] = int(attributes["sofaNum"]) self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id) self._max_sofa_num = max(attributes["sofaNum"], self._max_sofa_num) return Sofa(**attributes)
def _parse_sofa(self, typesystem: TypeSystem, elem) -> Sofa: attributes = dict(elem.attrib) attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id")) attributes["sofaNum"] = int(attributes["sofaNum"]) attributes["type"] = typesystem.get_type(TYPE_NAME_SOFA) self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id) self._max_sofa_num = max(attributes["sofaNum"], self._max_sofa_num) return Sofa(**attributes)
def test_get_covered_text_sentences(sentences): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train . The train was late .') cas = Cas(annotations=sentences, sofas=[sofa]) actual_text = [cas.get_covered_text(sentence) for sentence in sentences] expected_text = ['Joe waited for the train .', 'The train was late .'] assert actual_text == expected_text
def test_get_covered_text_tokens(tokens): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train . The train was late .') cas = Cas(annotations=tokens, sofas=[sofa]) actual_text = [cas.get_covered_text(token) for token in tokens] expected_text = [ 'Joe', 'waited', 'for', 'the', 'train', '.', 'The', 'train', 'was', 'late', '.' ] assert actual_text == expected_text
def test_add_annotation(small_typesystem_xml): sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train .') cas = Cas(sofas=[sofa]) typesystem = load_typesystem(small_typesystem_xml) TokenType = typesystem.get_type('cassis.Token') tokens = [ TokenType(xmiID=13, sofa=1, begin=0, end=3, id='0', pos='NNP'), TokenType(xmiID=19, sofa=1, begin=4, end=10, id='1', pos='VBD'), TokenType(xmiID=25, sofa=1, begin=11, end=14, id='2', pos='IN'), TokenType(xmiID=31, sofa=1, begin=15, end=18, id='3', pos='DT'), TokenType(xmiID=37, sofa=1, begin=19, end=24, id='4', pos='NN'), TokenType(xmiID=43, sofa=1, begin=25, end=26, id='5', pos='.'), ] for token in tokens: cas.add_annotation(token) actual_tokens = list(cas.select(TokenType.name)) assert actual_tokens == tokens
def _parse_sofa(self, elem) -> Sofa: attributes = dict(elem.attrib) attributes['xmiID'] = int(attributes.pop('{http://www.omg.org/XMI}id')) attributes['sofaNum'] = int(attributes['sofaNum']) return Sofa(**attributes)
def _parse_sofa(self, elem) -> Sofa: attributes = dict(elem.attrib) attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id")) attributes["sofaNum"] = int(attributes["sofaNum"]) return Sofa(**attributes)