Пример #1
0
def test_tag_partially():
    tag = Tag(pos=PartOfSpeech.ADJECTIVE)

    _assert_analyzed_equal(
        expected=[
            Morph(word='hello', lemma='hello', tag=_UNKNOWN),
            Morph(word='world', lemma='world', tag=tag),
        ],
        taggers=[ConstantTagger(word='world', tag=tag)],
        text=['hello', 'world'],
    )
Пример #2
0
def test_tag():
    tag = Tag(pos=PartOfSpeech.NOUN)

    _assert_analyzed_equal(
        expected=[Morph(word='hello', lemma='hello', tag=tag)],
        taggers=[ConstantTagger(word='hello', tag=tag)],
        text=['hello'],
    )
Пример #3
0
def test_unknown():
    _assert_analyzed_equal(
        expected=[
            Morph(
                word='hello',
                lemma='hello',
                tag=_UNKNOWN,
            ),
        ],
        taggers=[],
        text=['hello'],
    )
Пример #4
0
    def analyze(self, text: Text) -> Iterable[Morph]:
        tags: Dict[Index, Tag] = {}

        length = len(text)
        indices: Sequence[int] = range(length)
        for tagger in self._taggers:
            tags.update(tagger.tag(text, indices))
            indices = [index for index in indices if index not in tags]
            if not indices:
                break

        lemmatizer = self._lemmatizer
        for index, word in enumerate(text):
            tag = tags.get(index, _UNKNOWN)
            lemma = lemmatizer.lemmatize(word, tag)
            yield Morph(word, lemma, tag)