Пример #1
0
    def _parse_sofa(self, elem) -> Sofa:
        attributes = dict(elem.attrib)
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))
        attributes["sofaNum"] = int(attributes["sofaNum"])
        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
        self._max_sofa_num = max(attributes["sofaNum"], self._max_sofa_num)

        return Sofa(**attributes)
Пример #2
0
    def _parse_sofa(self, typesystem: TypeSystem, elem) -> Sofa:
        attributes = dict(elem.attrib)
        attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))
        attributes["sofaNum"] = int(attributes["sofaNum"])
        attributes["type"] = typesystem.get_type(TYPE_NAME_SOFA)
        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
        self._max_sofa_num = max(attributes["sofaNum"], self._max_sofa_num)

        return Sofa(**attributes)
Пример #3
0
def test_get_covered_text_sentences(sentences):
    sofa = Sofa(sofaNum=1,
                sofaString='Joe waited for the train . The train was late .')
    cas = Cas(annotations=sentences, sofas=[sofa])

    actual_text = [cas.get_covered_text(sentence) for sentence in sentences]

    expected_text = ['Joe waited for the train .', 'The train was late .']
    assert actual_text == expected_text
Пример #4
0
def test_get_covered_text_tokens(tokens):
    sofa = Sofa(sofaNum=1,
                sofaString='Joe waited for the train . The train was late .')
    cas = Cas(annotations=tokens, sofas=[sofa])

    actual_text = [cas.get_covered_text(token) for token in tokens]

    expected_text = [
        'Joe', 'waited', 'for', 'the', 'train', '.', 'The', 'train', 'was',
        'late', '.'
    ]
    assert actual_text == expected_text
Пример #5
0
def test_add_annotation(small_typesystem_xml):
    sofa = Sofa(sofaNum=1, sofaString='Joe waited for the train .')
    cas = Cas(sofas=[sofa])
    typesystem = load_typesystem(small_typesystem_xml)
    TokenType = typesystem.get_type('cassis.Token')

    tokens = [
        TokenType(xmiID=13, sofa=1, begin=0, end=3, id='0', pos='NNP'),
        TokenType(xmiID=19, sofa=1, begin=4, end=10, id='1', pos='VBD'),
        TokenType(xmiID=25, sofa=1, begin=11, end=14, id='2', pos='IN'),
        TokenType(xmiID=31, sofa=1, begin=15, end=18, id='3', pos='DT'),
        TokenType(xmiID=37, sofa=1, begin=19, end=24, id='4', pos='NN'),
        TokenType(xmiID=43, sofa=1, begin=25, end=26, id='5', pos='.'),
    ]
    for token in tokens:
        cas.add_annotation(token)

    actual_tokens = list(cas.select(TokenType.name))
    assert actual_tokens == tokens
Пример #6
0
 def _parse_sofa(self, elem) -> Sofa:
     attributes = dict(elem.attrib)
     attributes['xmiID'] = int(attributes.pop('{http://www.omg.org/XMI}id'))
     attributes['sofaNum'] = int(attributes['sofaNum'])
     return Sofa(**attributes)
Пример #7
0
 def _parse_sofa(self, elem) -> Sofa:
     attributes = dict(elem.attrib)
     attributes["xmiID"] = int(attributes.pop("{http://www.omg.org/XMI}id"))
     attributes["sofaNum"] = int(attributes["sofaNum"])
     return Sofa(**attributes)