Пример #1
0
def beyazlastirici_paths(mt_lexicon):
    beyaz = mt_lexicon.get_item_by_id('beyaz_Adj')
    assert beyaz is not None

    states = [(become_S, 'laş'), (verbRoot_S, ''), (vCausTir_S, 'tır'),
              (verbRoot_S, ''), (vAgt_S, 'ıcı'), (noun_S, ''), (a3sg_S, ''),
              (pnon_S, ''), (nom_ST, '')]
    stem_transition = StemTransition(beyaz, adjectiveRoot_ST)
    surface_transitions = [
        SurfaceTransition(stem_transition.surface, stem_transition)
    ]
    path = SearchPath.initial(stem_transition, 'laştırıcı')
    paths = [path]
    previous_state = adjectiveRoot_ST
    attrs = path.phonetic_attributes
    for state in states:
        st, surface = state
        transition = SurfaceTransition(surface,
                                       SuffixTransition(previous_state, st))
        surface_transitions.append(transition)
        new_attrs = calculate_phonetic_attributes(surface, frozenset(attrs))
        new_path = paths[-1].copy(transition, new_attrs)
        paths.append(new_path)
        previous_state = st
        attrs = new_attrs

    # < (beyaz_Adj)(-)(beyaz: adjectiveRoot_ST + laş:become_S + verbRoot_S + tır: vCausTır_S + verbRoot_S
    # + ıcı:vAgt_S + noun_S + a3sg_S + pnon_S + nom_ST) >
    for path in paths:
        print(path)
    return paths
Пример #2
0
def test_initial_search_path():
    from zeyrek.attributes import calculate_phonetic_attributes
    word = "beyazlaşacak"
    attrs = calculate_phonetic_attributes(word)
    dict_item = DictionaryItem("beyaz", "beyaz", PrimaryPos.Adjective, SecondaryPos.NONE, [], "beyaz", 0)
    transition = StemTransition(dict_item, root_S, attrs, word)
    assert transition.dict_item.lemma == "beyaz"
    p = SearchPath.initial(transition, "laşacak")
    assert p.stem_transition == transition
Пример #3
0
def dict_item():
    word = 'ev'
    lemma = word
    root = word
    primary_pos = PrimaryPos.Noun
    secondary_pos = SecondaryPos.NONE
    attrs = calculate_phonetic_attributes(word)
    pronunciation = word
    index = 0
    return DictionaryItem(lemma, root, primary_pos, secondary_pos, attrs, pronunciation, index)
Пример #4
0
def searchpath_dict_item_with_tail_and_RA_voicing(mt_lexicon):
    adak = mt_lexicon.get_item_by_id('adak_Noun')  # , elma, beyaz, meyve
    stem_transition = StemTransition(adak,
                                     noun_S,
                                     calculate_phonetic_attributes(
                                         adak.pronunciation),
                                     surface='adağ')
    print(f"Stem transition: {stem_transition}")
    path = SearchPath.initial(stem_transition, 'a')
    print(f"Path {path}")
    return path
Пример #5
0
def test_stem_transition():
    from zeyrek.attributes import calculate_phonetic_attributes
    word_line = 'beyaz [P:Adj]'
    lexicon = RootLexicon.from_lines([word_line])
    morphotactics = TurkishMorphotactics(lexicon=lexicon)
    dict_item = lexicon.get_matching_items('beyaz')[0]
    transition = morphotactics.stem_transitions.prefix_matches('beyaz')[0]
    assert transition.to_ == adjectiveRoot_ST
    assert str(
        transition) == "<(Dict: beyaz [P:Adj]):beyaz → [adjectiveRoot_ST:Adj]>"
    assert transition.condition is None
    assert transition.condition_count == 0
    assert transition.dict_item.lemma == 'beyaz'
    assert transition.from_ is root_S

    calculated_attrs = calculate_phonetic_attributes('beyaz')
    assert transition.attrs == calculated_attrs
    assert type(transition.to_) == MorphemeState
Пример #6
0
    def advance(self, path: SearchPath):
        """
        for all allowed matching outgoing transitions, new paths are generated.
        Transition `conditions` are used for checking if a `search path`
        is allowed to pass a transition.
        :param path:
        :return:
        """
        new_paths = []
        # for all outgoing transitions.
        # print(f"\n\n ADVANCE {path} for {len(path.current_state.outgoing)} transitions")
        for transition in path.current_state.outgoing:
            # if tail is empty and this transitions surface is not empty, no need to check.
            if len(path.tail) == 0 and transition.has_surface_form:
                logging.debug(
                    f"Rejecting path {path}: Path and transition surface mismatch: "
                )
                continue

            surface = generate_surface(transition, path.phonetic_attributes)

            # no need to go further if generated surface form is not a prefix of the paths's tail.
            tail_starts_with = path.tail.startswith(surface)
            if not tail_starts_with:
                logging.debug(
                    f"Rejecting path {path}: tail doesnt start with {path.tail}-{surface}"
                )
                continue

            # check conditions.
            if not transition.can_pass(path):
                logging.debug(
                    f"Rejecting path {path}-{transition}: can't pass")
                continue

            # epsilon (empty) transition. Add and continue. Use existing attributes.
            if not transition.has_surface_form:
                blank_surface_transition = SurfaceTransition("", transition)
                new_path = path.copy(blank_surface_transition,
                                     path.phonetic_attributes)
                new_paths.append(new_path)
                logging.debug(f"Appending path {new_path}")
                continue

            surface_transition = SurfaceTransition(surface, transition)

            # if tail is equal to surface, no need to calculate phonetic attributes.
            tail_equals_surface = path.tail == surface
            attributes = path.phonetic_attributes if tail_equals_surface \
                else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes))

            # This is required for suffixes like `cik` and `ciğ`
            # an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
            # if "cik" is generated, ExpectsConsonant attribute is added, so only a consonant starting
            # suffix can follow. Likewise, if "ciğ" is produced, a vowel starting suffix is allowed.
            if PhoneticAttribute.CannotTerminate in attributes:
                attributes.discard(PhoneticAttribute.CannotTerminate)
            last_token = transition.last_template_token
            if last_token.type_ == 'LAST_VOICED':
                attributes.add(PhoneticAttribute.ExpectsConsonant)
            elif last_token.type_ == 'LAST_NOT_VOICED':
                attributes.add(PhoneticAttribute.ExpectsVowel)
                attributes.add(PhoneticAttribute.CannotTerminate)
            p = path.copy(surface_transition, attributes)
            logging.debug(f"P path: {p}")
            new_paths.append(p)
        logging.debug(f"FINAL: ")
        for i, p in enumerate(new_paths):
            logging.debug(f"\t {i}: {p}")
        # print()
        return new_paths