Exemplo n.º 1
0
 def __processProperVariants(self, word : str, res : typing.List['MorphWordForm'], geo : bool) -> None:
     tn = self.m_root_reverce
     tn0 = None
     nodes_with_vars = None
     for i in range(len(word) - 1, -1, -1):
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         ch = ord(word[i])
         if (tn.nodes is None): 
             break
         wrapnext28 = RefOutArgWrapper(None)
         inoutres29 = Utils.tryGetValue(tn.nodes, ch, wrapnext28)
         next0_ = wrapnext28.value
         if (not inoutres29): 
             break
         tn = next0_
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         if (tn.reverce_variants is not None): 
             if (nodes_with_vars is None): 
                 nodes_with_vars = list()
             nodes_with_vars.append(tn)
             tn0 = tn
     else: i = -1
     if (nodes_with_vars is None): 
         return
     for j in range(len(nodes_with_vars) - 1, -1, -1):
         tn = nodes_with_vars[j]
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         ok = False
         for v in tn.reverce_variants: 
             if (geo and v.class0_.is_proper_geo): 
                 pass
             elif (not geo and v.class0_.is_proper_surname): 
                 pass
             else: 
                 continue
             r = MorphWordForm(v, word)
             if (not MorphWordForm._hasMorphEquals(res, r)): 
                 r.undef_coef = v.coef
                 res.append(r)
             ok = True
         if (ok): 
             break
Exemplo n.º 2
0
 def processResult(self, res: typing.List['MorphWordForm'], word_begin: str,
                   mvs: typing.List['MorphRuleVariant']) -> None:
     for mv in mvs:
         r = MorphWordForm(mv, None)
         if (mv.normal_tail is not None and len(mv.normal_tail) > 0
                 and mv.normal_tail[0] != '-'):
             r.normal_case = (word_begin + mv.normal_tail)
         else:
             r.normal_case = word_begin
         if (mv.full_normal_tail is not None):
             if (len(mv.full_normal_tail) > 0
                     and mv.full_normal_tail[0] != '-'):
                 r.normal_full = (word_begin + mv.full_normal_tail)
             else:
                 r.normal_full = word_begin
         if (not MorphWordForm._hasMorphEquals(res, r)):
             r.undef_coef = (0)
             res.append(r)
Exemplo n.º 3
0
 def process(self, word : str) -> typing.List['MorphWordForm']:
     """ Обработка одного слова
     
     Args:
         word(str): слово должно быть в верхнем регистре
     
     """
     if (Utils.isNullOrEmpty(word)): 
         return None
     res = None
     if (len(word) > 1): 
         i = 0
         while i < len(word): 
             ch = word[i]
             if (LanguageHelper.isCyrillicVowel(ch) or LanguageHelper.isLatinVowel(ch)): 
                 break
             i += 1
         if (i >= len(word)): 
             return res
     mvs = [ ]
     tn = self.m_root
     i = 0
     while i <= len(word): 
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         if (tn.rules is not None): 
             word_begin = None
             word_end = None
             if (i == 0): 
                 word_end = word
             elif (i < len(word)): 
                 word_end = word[i:]
             else: 
                 word_end = ""
             if (res is None): 
                 res = list()
             for r in tn.rules: 
                 wrapmvs14 = RefOutArgWrapper(None)
                 inoutres15 = Utils.tryGetValue(r.variants, word_end, wrapmvs14)
                 mvs = wrapmvs14.value
                 if (inoutres15): 
                     if (word_begin is None): 
                         if (i == len(word)): 
                             word_begin = word
                         elif (i > 0): 
                             word_begin = word[0:0+i]
                         else: 
                             word_begin = ""
                     r.processResult(res, word_begin, mvs)
         if (tn.nodes is None or i >= len(word)): 
             break
         ch = ord(word[i])
         wraptn16 = RefOutArgWrapper(None)
         inoutres17 = Utils.tryGetValue(tn.nodes, ch, wraptn16)
         tn = wraptn16.value
         if (not inoutres17): 
             break
         i += 1
     need_test_unknown_vars = True
     if (res is not None): 
         for r in res: 
             if ((r.class0_.is_pronoun or r.class0_.is_noun or r.class0_.is_adjective) or (r.class0_.is_misc and r.class0_.is_conjunction) or r.class0_.is_preposition): 
                 need_test_unknown_vars = False
             elif (r.class0_.is_adverb and r.normal_case is not None): 
                 if (not LanguageHelper.endsWithEx(r.normal_case, "О", "А", None, None)): 
                     need_test_unknown_vars = False
                 elif (r.normal_case == "МНОГО"): 
                     need_test_unknown_vars = False
             elif (r.class0_.is_verb and len(res) > 1): 
                 ok = False
                 for rr in res: 
                     if (rr != r and rr.class0_ != r.class0_): 
                         ok = True
                         break
                 if (ok and not LanguageHelper.endsWith(word, "ИМ")): 
                     need_test_unknown_vars = False
     if (need_test_unknown_vars and LanguageHelper.isCyrillicChar(word[0])): 
         gl = 0
         sog = 0
         j = 0
         while j < len(word): 
             if (LanguageHelper.isCyrillicVowel(word[j])): 
                 gl += 1
             else: 
                 sog += 1
             j += 1
         if ((gl < 2) or (sog < 2)): 
             need_test_unknown_vars = False
     if (need_test_unknown_vars and res is not None and len(res) == 1): 
         if (res[0].class0_.is_verb): 
             if ("н.вр." in res[0].misc.attrs and "нес.в." in res[0].misc.attrs and not "страд.з." in res[0].misc.attrs): 
                 need_test_unknown_vars = False
             elif ("б.вр." in res[0].misc.attrs and "сов.в." in res[0].misc.attrs): 
                 need_test_unknown_vars = False
             elif (res[0].normal_case is not None and LanguageHelper.endsWith(res[0].normal_case, "СЯ")): 
                 need_test_unknown_vars = False
         if (res[0].class0_.is_undefined and "прдктв." in res[0].misc.attrs): 
             need_test_unknown_vars = False
     if (need_test_unknown_vars): 
         if (self.m_root_reverce is None): 
             return res
         tn = self.m_root_reverce
         tn0 = None
         for i in range(len(word) - 1, -1, -1):
             if (tn.lazy_pos > 0): 
                 self.__loadTreeNode(tn)
             ch = ord(word[i])
             if (tn.nodes is None): 
                 break
             wrapnext18 = RefOutArgWrapper(None)
             inoutres19 = Utils.tryGetValue(tn.nodes, ch, wrapnext18)
             next0_ = wrapnext18.value
             if (not inoutres19): 
                 break
             tn = next0_
             if (tn.lazy_pos > 0): 
                 self.__loadTreeNode(tn)
             if (tn.reverce_variants is not None): 
                 tn0 = tn
                 break
         else: i = -1
         if (tn0 is not None): 
             glas = i < 4
             while i >= 0: 
                 if (LanguageHelper.isCyrillicVowel(word[i]) or LanguageHelper.isLatinVowel(word[i])): 
                     glas = True
                     break
                 i -= 1
             if (glas): 
                 for mv in tn0.reverce_variants: 
                     if (((not mv.class0_.is_verb and not mv.class0_.is_adjective and not mv.class0_.is_noun) and not mv.class0_.is_proper_surname and not mv.class0_.is_proper_geo) and not mv.class0_.is_proper_secname): 
                         continue
                     ok = False
                     for rr in res: 
                         if (rr.is_in_dictionary): 
                             if (rr.class0_ == mv.class0_ or rr.class0_.is_noun): 
                                 ok = True
                                 break
                             if (not mv.class0_.is_adjective and rr.class0_.is_verb): 
                                 ok = True
                                 break
                     if (ok): 
                         continue
                     if (len(mv.tail) > 0 and not LanguageHelper.endsWith(word, mv.tail)): 
                         continue
                     r = MorphWordForm(mv, word)
                     if (not MorphWordForm._hasMorphEquals(res, r)): 
                         r.undef_coef = mv.coef
                         if (res is None): 
                             res = list()
                         res.append(r)
     if (word == "ПРИ" and res is not None): 
         for i in range(len(res) - 1, -1, -1):
             if (res[i].class0_.is_proper_geo): 
                 del res[i]
         else: i = -1
     if (res is None or len(res) == 0): 
         return None
     MorphEngine.__sort(res, word)
     for v in res: 
         if (v.normal_case is None): 
             v.normal_case = word
         if (v.class0_.is_verb): 
             if (v.normal_full is None and LanguageHelper.endsWith(v.normal_case, "ТЬСЯ")): 
                 v.normal_full = v.normal_case[0:0+len(v.normal_case) - 2]
         v.language = self.language
         if (v.class0_.is_preposition): 
             v.normal_case = LanguageHelper.normalizePreposition(v.normal_case)
     mc = MorphClass()
     for i in range(len(res) - 1, -1, -1):
         if (not res[i].is_in_dictionary and res[i].class0_.is_adjective and len(res) > 1): 
             if ("к.ф." in res[i].misc.attrs or "неизм." in res[i].misc.attrs): 
                 del res[i]
                 continue
         if (res[i].is_in_dictionary): 
             mc.value |= res[i].class0_.value
     else: i = -1
     if (mc == MorphClass.VERB and len(res) > 1): 
         for r in res: 
             if (r.undef_coef > (100) and r.class0_ == MorphClass.ADJECTIVE): 
                 r.undef_coef = (0)
     if (len(res) == 0): 
         return None
     return res
Exemplo n.º 4
0
 def getAllWordforms(self, word : str) -> typing.List['MorphWordForm']:
     res = list()
     tn = self.m_root
     i = 0
     while i <= len(word): 
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         if (tn.rules is not None): 
             word_begin = ""
             word_end = ""
             if (i > 0): 
                 word_begin = word[0:0+i]
             else: 
                 word_end = word
             if (i < len(word)): 
                 word_end = word[i:]
             else: 
                 word_begin = word
             for r in tn.rules: 
                 if (word_end in r.variants): 
                     for vl in r.variants_list: 
                         for v in vl: 
                             wf = MorphWordForm(v, None)
                             if (not MorphWordForm._hasMorphEquals(res, wf)): 
                                 wf.normal_case = (word_begin + v.tail)
                                 wf.undef_coef = (0)
                                 res.append(wf)
         if (tn.nodes is None or i >= len(word)): 
             break
         ch = ord(word[i])
         wraptn20 = RefOutArgWrapper(None)
         inoutres21 = Utils.tryGetValue(tn.nodes, ch, wraptn20)
         tn = wraptn20.value
         if (not inoutres21): 
             break
         i += 1
     i = 0
     first_pass2713 = True
     while True:
         if first_pass2713: first_pass2713 = False
         else: i += 1
         if (not (i < len(res))): break
         wf = res[i]
         if (wf.containsAttr("инф.", None)): 
             continue
         j = i + 1
         first_pass2714 = True
         while True:
             if first_pass2714: first_pass2714 = False
             else: j += 1
             if (not (j < len(res))): break
             wf1 = res[j]
             if (wf1.containsAttr("инф.", None)): 
                 continue
             if ((wf.class0_ == wf1.class0_ and wf.gender == wf1.gender and wf.number == wf1.number) and wf.normal_case == wf1.normal_case): 
                 wf.case_ = (wf.case_) | wf1.case_
                 del res[j]
                 j -= 1
     i = 0
     first_pass2715 = True
     while True:
         if first_pass2715: first_pass2715 = False
         else: i += 1
         if (not (i < len(res))): break
         wf = res[i]
         if (wf.containsAttr("инф.", None)): 
             continue
         j = i + 1
         first_pass2716 = True
         while True:
             if first_pass2716: first_pass2716 = False
             else: j += 1
             if (not (j < len(res))): break
             wf1 = res[j]
             if (wf1.containsAttr("инф.", None)): 
                 continue
             if ((wf.class0_ == wf1.class0_ and wf.case_ == wf1.case_ and wf.number == wf1.number) and wf.normal_case == wf1.normal_case): 
                 wf.gender = Utils.valToEnum((wf.gender) | (wf1.gender), MorphGender)
                 del res[j]
                 j -= 1
     return res