示例#1
0
    def find_suffix(self, word):
        genel = []
        word_list = []
        if type(word) is str:
            word = to_lower(word)
            word_list = self.arr_word_list(word)
        else:
            pass

        if not word_list:
            return []

        for mstem in word_list:
            if len(mstem) > 1:
                suffix = mstem[-1]
            else:
                genel.append(mstem)
                continue

            if not suffix:
                genel.append(mstem)

            _stem = mstem[0]
            stem = dstem_temizleyici.sub('', _stem)
            stem_type = _stem[_stem.find('(') + 1:_stem.find(')')]
            fs = self.find_from_list(stem[-1], suffix, stem_type)
            if fs:
                for x in fs:
                    genel.append(mstem[:-1] + x)
        return genel
示例#2
0
 def __init__(self, word):
     word = to_lower(word)
     word = replace_cap_letter(word)
     if not word or (word.isalpha is False):
         self.result = []
         self.stem_list = []
     self.stems = []
     self.word = word
     self.stem_list = self.find_stem()
     # print(self.stem_list)
     self.result = self.find_suffix(word)
     if self.result:
         self.stems = list(set([x[0] for x in self.result]))
     self.result = list(set(['+'.join(z) for z in self.result]))
示例#3
0
    def find_stem(self):
        word = self.word
        temp_stem_list = []
        first_syllable = spellword(word)

        if first_syllable is False:
            return []
        else:
            first_syllable = first_syllable[0]

        search_in_dict = [
            x for x in sozluk if x[0][:len(first_syllable)] == first_syllable
        ]

        if not search_in_dict and first_syllable[-1] in 'bcdgğ':
            yumusama_harfleri = {
                'b': 'p',
                'c': 'ç',
                'd': 't',
                'g': 'k',
                'ğ': 'k'
            }
            first_syllable = first_syllable[:-1] + yumusama_harfleri[
                first_syllable[-1]]
            search_in_dict = [
                x for x in sozluk
                if x[0][:len(first_syllable)] == first_syllable
            ]
            if not search_in_dict:
                return []
            else:
                word = first_syllable + word[len(first_syllable):]

        if word.startswith('di'):
            temp_stem_list.append(('di', ('de', 'fiil'), 0))
        elif word.startswith('yi'):
            temp_stem_list.append(('yi', ('ye', 'fiil'), 0))

        mstem = ''
        _mstem = ''
        yor_bul = re.search('[ıiuü]yor', word)
        if yor_bul:
            x, y = yor_bul.span()
            if x == 0:
                yor_bul = re.search('[ıiuü]*yor', word[x:])
                if yor_bul:
                    x, y = yor_bul.span()
                    mstem = word[:x]
                    if yor_bul.group()[0] in ('ı', 'u'):
                        _mstem = mstem + 'a'
                        mstem = word[:x + 1]
                    elif yor_bul.group()[0] in ('i', 'ü'):
                        _mstem = mstem + 'e'
                        mstem = word[:x + 1]
                    else:
                        mstem = ''
            else:
                mstem = word[:x]
                if yor_bul.group()[0] in ('ı', 'u'):
                    _mstem = mstem + 'a'
                    mstem = word[:x + 1]
                elif yor_bul.group()[0] in ('i', 'ü'):
                    _mstem = mstem + 'e'
                    mstem = word[:x + 1]
                else:
                    mstem = ''

        for stem in search_in_dict:
            if (stem[2] == 'AKR') and (word.startswith(stem[0])):
                larler_dict = {
                    'mler': '+m(1. Tekil Kişi İyelik Eki){içe-2}',
                    'mlar': '+m(1. Tekil Kişi İyelik Eki){içe-2}',
                    'nler': '+n(2. Tekil Kişi İyelik Eki){içe-2}',
                    'nlar': '+n(2. Tekil Kişi İyelik Eki){içe-2}'
                }
                larler_liste = ('mler', 'nler', 'mlar', 'nlar')
                sonrasi = word[len(stem[0]):]
                for larlerek in larler_liste:
                    if sonrasi.startswith(larlerek):
                        suff = word[len(stem[0]) + 1:]
                        temp_stem_list.append(
                            (stem[0] + '(isim)' + larler_dict[larlerek], suff,
                             0))

            tlstem = to_lower(stem[0])

            rpstem = replace_cap_letter(tlstem)

            if stem[1] in not_get_suffix:
                if word == rpstem:
                    temp_stem_list.append((stem[0], stem[1], 0))
                    continue
                else:
                    continue

            if mstem and (stem[1] == 'fiil') and (_mstem == rpstem):
                temp_stem_list.append((mstem, (_mstem, stem[1]), 0))

            if word == rpstem:
                temp_stem_list.append((stem[0], stem[1], 0))

            if (word != rpstem) and (word.startswith(rpstem)):
                levenshtein_dist = lddistance(word, rpstem)
                temp_stem_list.append((tlstem, stem[1], levenshtein_dist))
                # continue

            letter_harmony = re.findall('\((\w+)\)', stem[2])
            rp_letter_harmony = letter_harmony

            if letter_harmony:
                rp_letter_harmony = replace_cap_letter(rp_letter_harmony[0])
                if word.startswith(rp_letter_harmony):
                    levenshtein_dist = lddistance(word, rp_letter_harmony)
                    temp_stem_list.append(
                        (letter_harmony[0], (stem[0], stem[1]),
                         levenshtein_dist))

        organized_list = sorted(unrepeated_list(temp_stem_list),
                                key=itemgetter(2))

        return organized_list