Пример #1
0
def _sentence_tagged_words(sent_elem):
    res = []
    for tok in sent_elem.findall('*//token'):
        text = text_type(tok.get('text'))
        parse = tok.find('*//l')
        tag = text_type(',').join(_grammemes(parse))
        res.append((text, tag))
    return res
Пример #2
0
def _sentence_parsed_words(sent_elem):
    res = []
    for tok in sent_elem.findall('*//token'):
        text = text_type(tok.get('text'))
        parses = tok.findall('*//l')
        annotations = [
            (text_type(p.get('t')), text_type(',').join(_grammemes(p)))
            for p in parses
        ]
        res.append((text, annotations))
    return res
Пример #3
0
def _grammemes(l_element):
    return [text_type(grammeme.get('v'))
            for grammeme in l_element.getchildren()]
Пример #4
0
def _sentence_words(sent_elem):
    return [text_type(tok.get('text')) for tok in sent_elem.findall('*//token')]
Пример #5
0
def _sentence_source(sent_elem):
    return text_type(sent_elem.find('source').text)
Пример #6
0
 def categories(self):
     return [text_type(tag.text) for tag in self.root.findall('tags//tag')]
Пример #7
0
 def title(self):
     return text_type(self.root.get('name'))