示例#1
0
def test_rootlexicon(dict_item):
    lex = RootLexicon()
    lex.add(dict_item)
    assert dict_item in lex.item_set
    assert dict_item.id_ in lex.id_dict
    item = lex.get_item_by_id(dict_item.id_)
    assert item == dict_item
    assert len(lex) == 1
示例#2
0
 def __init__(self, lexicon=None, formatter=None):
     self.lexicon = (lexicon if lexicon is not None else
                     RootLexicon.default_text_dictionaries())
     self.morphotactics = TurkishMorphotactics(self.lexicon)
     self.analyzer = RuleBasedAnalyzer(self.morphotactics)
     self.formatter = (DefaultFormatter(True) if formatter is None else
                       MorphAnalyzer.formatters[formatter]())
示例#3
0
def test_stem_transition():
    from zeyrek.attributes import calculate_phonetic_attributes
    word_line = 'beyaz [P:Adj]'
    lexicon = RootLexicon.from_lines([word_line])
    morphotactics = TurkishMorphotactics(lexicon=lexicon)
    dict_item = lexicon.get_matching_items('beyaz')[0]
    transition = morphotactics.stem_transitions.prefix_matches('beyaz')[0]
    assert transition.to_ == adjectiveRoot_ST
    assert str(
        transition) == "<(Dict: beyaz [P:Adj]):beyaz → [adjectiveRoot_ST:Adj]>"
    assert transition.condition is None
    assert transition.condition_count == 0
    assert transition.dict_item.lemma == 'beyaz'
    assert transition.from_ is root_S

    calculated_attrs = calculate_phonetic_attributes('beyaz')
    assert transition.attrs == calculated_attrs
    assert type(transition.to_) == MorphemeState
示例#4
0
def lex_from_lines():
    return RootLexicon.from_lines(["adak", "elma", "beyaz [P:Adj]", "meyve"])
示例#5
0
"""Tests for `zeyrek.conditions` module."""
import pytest
from zeyrek.attributes import RootAttribute, PhoneticAttribute, \
    calculate_phonetic_attributes
from zeyrek.conditions import CombinedCondition, has, HasRootAttribute, DictionaryItemIs, not_have, \
    HasPhoneticAttribute, DictionaryItemIsAny, NoSurfaceAfterDerivation, HasAnySuffixSurface, HasTail, \
    PreviousMorphemeIs, PreviousStateIs, LastDerivationIs, HasDerivation, PreviousStateIsNot, HasTailSequence, \
    ContainsMorphemeSequence, LastDerivationIsAny, PreviousGroupContains, CurrentGroupContainsAny, \
    PreviousGroupContainsMorpheme, ContainsMorpheme, PreviousMorphemeIsAny, PreviousStateIsAny
from zeyrek.lexicon import RootLexicon
from zeyrek.morphology import MorphAnalyzer
from zeyrek.morphotactics import SearchPath, StemTransition, noun_S, SurfaceTransition, SuffixTransition, \
    adjectiveRoot_ST, verbRoot_S, become_S, vPast_S, past, verb, vCausTir_S, \
    nom_ST, vAgt_S, a3sg_S, pnon_S, morphemes, agt, a3sg, noun, pnon, nom, vPass_S, vAble_S

lex = RootLexicon.from_lines(["adak", "elma", "beyaz [P:Adj]", "meyve"])


@pytest.fixture(scope='session')
def lex_from_lines():
    return RootLexicon.from_lines(["adak", "elma", "beyaz [P:Adj]", "meyve"])


@pytest.fixture(scope='session')
def mt_lexicon():
    """Connects morphotactics graph and returns full lexicon"""
    lemmer = MorphAnalyzer()
    return lemmer.lexicon


@pytest.fixture(scope='session')
示例#6
0
def test_default_lexicon():
    lex = RootLexicon.default_text_dictionaries()
    assert lex.get_item_by_id('elma_Noun') is not None
    assert len(lex) > 0