Exemplo n.º 1
0
 def __deserialize_item(self, stream: Stream) -> 'MorphBaseInfo':
     from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
     ty = stream.readbyte()
     res = (MorphBaseInfo() if ty == 0 else MorphWordForm())
     res.class0_ = MorphClass._new53(
         SerializerHelper.deserialize_short(stream))
     res.case_ = MorphCase._new29(
         SerializerHelper.deserialize_short(stream))
     res.gender = Utils.valToEnum(
         SerializerHelper.deserialize_short(stream), MorphGender)
     res.number = Utils.valToEnum(
         SerializerHelper.deserialize_short(stream), MorphNumber)
     res.language = MorphLang._new56(
         SerializerHelper.deserialize_short(stream))
     if (ty == 0):
         return res
     wf = Utils.asObjectOrNull(res, MorphWordForm)
     wf.normal_case = SerializerHelper.deserialize_string(stream)
     wf.normal_full = SerializerHelper.deserialize_string(stream)
     wf.undef_coef = SerializerHelper.deserialize_short(stream)
     cou = SerializerHelper.deserialize_int(stream)
     i = 0
     while i < cou:
         if (wf.misc is None):
             wf.misc = MorphMiscInfo()
         wf.misc.attrs.append(SerializerHelper.deserialize_string(stream))
         i += 1
     return res
Exemplo n.º 2
0
 def __init__(self, source: 'MorphCollection' = None) -> None:
     super().__init__()
     self.__m_class = MorphClass()
     self.__m_gender = MorphGender.UNDEFINED
     self.__m_number = MorphNumber.UNDEFINED
     self.__m_case = MorphCase()
     self.__m_language = MorphLang()
     self.__m_voice = MorphVoice.UNDEFINED
     self.__m_need_recalc = True
     self.__m_items = None
     if (source is None):
         return
     for it in source.items:
         mi = None
         if (isinstance(it, MorphWordForm)):
             wf = MorphWordForm()
             wf.copy_from_word_form(Utils.asObjectOrNull(it, MorphWordForm))
             mi = (wf)
         else:
             mi = MorphBaseInfo()
             mi.copy_from(it)
         if (self.__m_items is None):
             self.__m_items = list()
         self.__m_items.append(mi)
     self.__m_class = MorphClass._new53(source.__m_class.value)
     self.__m_gender = source.__m_gender
     self.__m_case = MorphCase._new29(source.__m_case.value)
     self.__m_number = source.__m_number
     self.__m_language = MorphLang._new56(source.__m_language.value)
     self.__m_voice = source.__m_voice
     self.__m_need_recalc = False
Exemplo n.º 3
0
 def clone(self) -> 'MorphCollection':
     """ Создать копию
     
     """
     res = MorphCollection()
     if (self.__m_items is not None):
         res.__m_items = list()
         try:
             res.__m_items.extend(self.__m_items)
         except Exception as ex:
             pass
     if (not self.__m_need_recalc):
         res.__m_class = MorphClass._new53(self.__m_class.value)
         res.__m_gender = self.__m_gender
         res.__m_case = MorphCase._new29(self.__m_case.value)
         res.__m_number = self.__m_number
         res.__m_language = MorphLang._new56(self.__m_language.value)
         res.__m_need_recalc = False
         res.__m_voice = self.__m_voice
     return res
Exemplo n.º 4
0
 def _deserialize(self, stream: Stream) -> None:
     from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
     self.__m_class = MorphClass._new53(
         SerializerHelper.deserialize_short(stream))
     self.__m_case = MorphCase._new29(
         SerializerHelper.deserialize_short(stream))
     self.__m_gender = (Utils.valToEnum(
         SerializerHelper.deserialize_short(stream), MorphGender))
     self.__m_number = (Utils.valToEnum(
         SerializerHelper.deserialize_short(stream), MorphNumber))
     self.__m_voice = (Utils.valToEnum(
         SerializerHelper.deserialize_short(stream), MorphVoice))
     self.__m_language = MorphLang._new56(
         SerializerHelper.deserialize_short(stream))
     cou = SerializerHelper.deserialize_int(stream)
     self.__m_items = list()
     i = 0
     while i < cou:
         it = self.__deserialize_item(stream)
         if (it is not None):
             self.__m_items.append(it)
         i += 1
     self.__m_need_recalc = False
Exemplo n.º 5
0
 def get_normal_case_text(self,
                          mc: 'MorphClass' = None,
                          num: 'MorphNumber' = MorphNumber.UNDEFINED,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
     if ((isinstance(self.begin_token, ReferentToken))
             and self.begin_token == self.end_token):
         return self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     res = None
     max_coef = 0
     def_coef = -1
     for it in self.morph.items:
         v = Utils.asObjectOrNull(it, NounPhraseItemTextVar)
         if (v is None):
             continue
         if (v.undef_coef > 0
                 and (((v.undef_coef < max_coef) or def_coef >= 0))):
             continue
         if (num == MorphNumber.SINGULAR
                 and v.single_number_value is not None):
             if (mc is not None and ((gender == MorphGender.NEUTER
                                      or gender == MorphGender.FEMINIE))
                     and mc.is_adjective):
                 bi = MorphBaseInfo._new401(MorphClass._new53(mc.value),
                                            gender, MorphNumber.SINGULAR,
                                            MorphCase.NOMINATIVE,
                                            self.morph.language)
                 str0_ = MorphologyService.get_wordform(
                     v.single_number_value, bi)
                 if (str0_ is not None):
                     res = str0_
             else:
                 res = v.single_number_value
             if (v.undef_coef == 0):
                 break
             max_coef = v.undef_coef
             continue
         if (Utils.isNullOrEmpty(v.normal_value)):
             continue
         if (str.isdigit(v.normal_value[0]) and mc is not None
                 and mc.is_adjective):
             val = 0
             wrapval402 = RefOutArgWrapper(0)
             inoutres403 = Utils.tryParseInt(v.normal_value, wrapval402)
             val = wrapval402.value
             if (inoutres403):
                 str0_ = NumberHelper.get_number_adjective(
                     val, gender,
                     (MorphNumber.SINGULAR if num == MorphNumber.SINGULAR
                      or val == 1 else MorphNumber.PLURAL))
                 if (str0_ is not None):
                     res = str0_
                     if (v.undef_coef == 0):
                         break
                     max_coef = v.undef_coef
                     continue
         res1 = it.normal_value
         if (num == MorphNumber.SINGULAR):
             if (res1 == "ДЕТИ"):
                 res1 = "РЕБЕНОК"
             elif (res1 == "ЛЮДИ"):
                 res1 = "ЧЕЛОВЕК"
         max_coef = v.undef_coef
         if (v.undef_coef > 0):
             res = res1
             continue
         def_co = 0
         if (mc is not None and mc.is_adjective and v.undef_coef == 0):
             pass
         elif (
             ((isinstance(self.begin_token, TextToken))
              and res1 == self.begin_token.term and it.case_.is_nominative)
                 and it.number == MorphNumber.SINGULAR):
             def_co = 1
         if (num == MorphNumber.PLURAL and
             ((v.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)):
             def_co += 3
         if (res is None or def_co > def_coef):
             res = res1
             def_coef = def_co
             if (def_co > 0):
                 break
     if (res is not None):
         return self.__corr_chars(res, keep_chars)
     if (res is None and self.begin_token == self.end_token):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     elif (res is None):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
         if (res is None):
             res = MiscHelper.get_text_value_of_meta_token(
                 self, (GetTextAttr.KEEPREGISTER
                        if keep_chars else GetTextAttr.NO))
         else:
             res = "{0} {1}".format(
                 res,
                 MiscHelper.get_text_value(
                     self.begin_token.next0_, self.end_token,
                     (GetTextAttr.KEEPREGISTER
                      if keep_chars else GetTextAttr.NO)))
     return Utils.ifNotNull(res, "?")
Exemplo n.º 6
0
 def get_normal_case_text(self,
                          mc: 'MorphClass' = None,
                          num: 'MorphNumber' = MorphNumber.UNDEFINED,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
     from pullenti.ner.core.MiscHelper import MiscHelper
     empty = True
     if (mc is not None and mc.is_preposition):
         return LanguageHelper.normalize_preposition(self.term)
     for it in self.morph.items:
         if (mc is not None and not mc.is_undefined):
             cc = (it.class0_) & mc
             if (cc.is_undefined):
                 continue
             if (cc.is_misc and not cc.is_proper and mc != it.class0_):
                 continue
         wf = Utils.asObjectOrNull(it, MorphWordForm)
         normal_full = False
         if (gender != MorphGender.UNDEFINED):
             if (((it.gender) & (gender)) == (MorphGender.UNDEFINED)):
                 if ((gender == MorphGender.MASCULINE and
                      ((it.gender != MorphGender.UNDEFINED or it.number
                        == MorphNumber.PLURAL)) and wf is not None)
                         and wf.normal_full is not None):
                     normal_full = True
                 elif (gender == MorphGender.MASCULINE
                       and it.class0_.is_personal_pronoun):
                     pass
                 else:
                     continue
         if (not it.case_.is_undefined):
             empty = False
         if (wf is not None):
             res = None
             if (num == MorphNumber.SINGULAR
                     and it.number == MorphNumber.PLURAL
                     and wf.normal_full is not None):
                 le = len(wf.normal_case)
                 if ((le == (len(wf.normal_full) + 2) and le > 4
                      and wf.normal_case[le - 2] == 'С')
                         and wf.normal_case[le - 1] == 'Я'):
                     res = wf.normal_case
                 else:
                     res = (wf.normal_full
                            if normal_full else wf.normal_full)
             else:
                 res = (wf.normal_full if normal_full else
                        (Utils.ifNotNull(wf.normal_case, self.term)))
             if (num == MorphNumber.SINGULAR and mc is not None
                     and mc == MorphClass.NOUN):
                 if (res == "ДЕТИ"):
                     res = "РЕБЕНОК"
             if (keep_chars):
                 if (self.chars.is_all_lower):
                     res = res.lower()
                 elif (self.chars.is_capital_upper):
                     res = MiscHelper.convert_first_char_upper_and_other_lower(
                         res)
             return res
     if (not empty):
         return None
     te = None
     if (num == MorphNumber.SINGULAR and mc is not None):
         bi = MorphBaseInfo._new492(MorphClass._new53(mc.value), gender,
                                    MorphNumber.SINGULAR,
                                    self.morph.language)
         vars0_ = MorphologyService.get_wordform(self.term, bi)
         if (vars0_ is not None):
             te = vars0_
     if (te is None):
         te = self.term
     if (keep_chars):
         if (self.chars.is_all_lower):
             return te.lower()
         elif (self.chars.is_capital_upper):
             return MiscHelper.convert_first_char_upper_and_other_lower(te)
     return te