Пример #1
0
 def __str__(self) -> str:
     return "{0}: {1} {2}{3}".format(
         Utils.enumToString(self.typ),
         Utils.ifNotNull(self.value,
                         (("" if self.ref is None else str(self.ref)))),
         Utils.ifNotNull(self.alt_value, ""),
         ("[int]" if self.is_internal else ""))
Пример #2
0
 def to_string(self,
               short_variant: bool,
               lang: 'MorphLang' = None,
               lev: int = 0) -> str:
     misc = self.get_string_value(DefinitionReferent.ATTR_TERMIN_ADD)
     if (misc is None):
         misc = self.get_string_value(DefinitionReferent.ATTR_MISC)
     return "[{0}] {1}{2} = {3}".format(
         Utils.enumToString(self.kind), Utils.ifNotNull(self.termin, "?"),
         ("" if misc is None else " ({0})".format(misc)),
         Utils.ifNotNull(self.value, "?"))
Пример #3
0
 def __str__(self) -> str:
     if (self.internal_noun is None):
         return "{0} {1}".format(
             Utils.ifNotNull(
                 self.getNormalCaseText(None, False, MorphGender.UNDEFINED,
                                        False), "?"), str(self.morph))
     else:
         return "{0} {1} / {2}".format(
             Utils.ifNotNull(
                 self.getNormalCaseText(None, False, MorphGender.UNDEFINED,
                                        False), "?"), str(self.morph),
             str(self.internal_noun))
Пример #4
0
 def get_normal_case_text_without_adjective(self, adj_index : int) -> str:
     res = io.StringIO()
     i = 0
     while i < len(self.adjectives): 
         if (i != adj_index): 
             s = self.adjectives[i].get_normal_case_text((MorphClass.ADJECTIVE) | MorphClass.PRONOUN, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
             print("{0} ".format(Utils.ifNotNull(s, "?")), end="", file=res, flush=True)
         i += 1
     r = self.noun.get_normal_case_text((MorphClass.NOUN) | MorphClass.PRONOUN, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
     if (r is None): 
         r = self.noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
     print(Utils.ifNotNull(r, str(self.noun)), end="", file=res)
     return Utils.toStringStringIO(res)
Пример #5
0
 def deserialize_derivate_group(str0_: 'ByteArrayWrapper',
                                dg: 'DerivateGroup', pos: int) -> None:
     attr = str0_.deserialize_short(pos)
     if (((attr & 1)) != 0):
         dg.is_dummy = True
     if (((attr & 2)) != 0):
         dg.not_generate = True
     if (((attr & 4)) != 0):
         dg.m_transitive = 0
     if (((attr & 8)) != 0):
         dg.m_transitive = 1
     if (((attr & 0x10)) != 0):
         dg.m_rev_agent_case = 0
     if (((attr & 0x20)) != 0):
         dg.m_rev_agent_case = 1
     if (((attr & 0x40)) != 0):
         dg.m_rev_agent_case = 2
     dg.questions = (Utils.valToEnum(str0_.deserialize_short(pos),
                                     NextModelQuestion))
     dg.questions_ref = (Utils.valToEnum(str0_.deserialize_short(pos),
                                         NextModelQuestion))
     dg.prefix = str0_.deserialize_string(pos)
     cou = str0_.deserialize_short(pos)
     while cou > 0:
         w = DerivateWord(dg)
         w.spelling = str0_.deserialize_string(pos)
         w.class0_ = MorphClass()
         w.class0_.value = (str0_.deserialize_short(pos))
         w.lang = MorphLang._new10(str0_.deserialize_short(pos))
         w.attrs.value = (str0_.deserialize_short(pos))
         dg.words.append(w)
         cou -= 1
     cou = str0_.deserialize_short(pos)
     while cou > 0:
         pref = Utils.ifNotNull(str0_.deserialize_string(pos), "")
         cas = MorphCase()
         cas.value = (str0_.deserialize_short(pos))
         if (dg.nexts is None):
             dg.nexts = dict()
         dg.nexts[pref] = cas
         cou -= 1
     cou = str0_.deserialize_short(pos)
     while cou > 0:
         pref = Utils.ifNotNull(str0_.deserialize_string(pos), "")
         cas = MorphCase()
         cas.value = (str0_.deserialize_short(pos))
         if (dg.nexts_ref is None):
             dg.nexts_ref = dict()
         dg.nexts_ref[pref] = cas
         cou -= 1
Пример #6
0
 def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None:
     merge_statistic = True
     for s in obj.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): 
             nam = s.value
             if (LanguageHelper.isLatinChar(nam[0])): 
                 if (not lang.is_en): 
                     continue
             elif (lang.is_en): 
                 continue
             if (LanguageHelper.endsWith(nam, " ССР")): 
                 continue
         self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_NAME): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.is_territory): 
         if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): 
             s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True)
             if (s is not None): 
                 self.slots.remove(s)
     if (self.is_state): 
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): 
                 self.slots.remove(s)
                 break
     if (self.is_city): 
         s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True)))
         if (s is not None): 
             for ss in self.slots: 
                 if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): 
                     self.slots.remove(s)
                     break
     has = False
     i = 0
     while i < len(self.slots): 
         if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): 
             if (not has): 
                 has = True
             else: 
                 del self.slots[i]
                 i -= 1
         i += 1
     self._mergeExtReferents(obj)
Пример #7
0
 def __calc_actant(self) -> float:
     if (self.can_be_participle):
         self.coef = -1
         return self.coef
     vf2 = self.to_verb.last_verb.verb_morph
     if (vf2 is None):
         return -1
     if (self.from_prep is None):
         self.coef = 0
         return self.coef
     fm = self.from0_.source.source.morph
     grs = DerivateService.find_derivates(
         Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None)
     if (grs is not None):
         for gr in grs:
             if (gr.cm.nexts is None or not self.from_prep in gr.cm.nexts):
                 continue
             cas = gr.cm.nexts[self.from_prep]
             if (not ((cas) & fm.case_).is_undefined):
                 self.coef = SemanticService.PARAMS.next_model
                 if (Utils.isNullOrEmpty(self.from_prep)):
                     if (fm.case_.is_nominative):
                         self.coef /= (2)
                     self.coef /= (2)
                 return self.coef
             if (self.from0_.source.source.morph.case_.is_undefined):
                 self.coef = 0
                 return self.coef
     self.coef = 0.1
     return self.coef
 def __serialize_morph_tree_node(res: io.IOBase,
                                 tn: 'MorphTreeNode') -> None:
     if (tn.rules is not None):
         for r in tn.rules:
             MorphSerializeHelper.__serialize_short(res, r._id0_)
     MorphSerializeHelper.__serialize_short(res, 0)
     if (tn.reverce_variants is not None):
         for v in tn.reverce_variants:
             MorphSerializeHelper.__serialize_string(
                 res, Utils.ifNotNull(v.tail, ""))
             if (v.rule is not None):
                 pass
             MorphSerializeHelper.__serialize_short(
                 res, (0 if v.rule is None else v.rule._id0_))
             MorphSerializeHelper.__serialize_short(res, v.coef)
             MorphSerializeHelper.__serialize_morph_rule_variant(res, v)
     MorphSerializeHelper.__serialize_string(res, None)
     if (tn.nodes is not None):
         for n in tn.nodes.items():
             MorphSerializeHelper.__serialize_short(res, n[0])
             p0 = res.tell()
             MorphSerializeHelper.__serialize_int(res, 0)
             MorphSerializeHelper.__serialize_morph_tree_node(res, n[1])
             p1 = res.tell()
             res.seek(p0, io.SEEK_SET)
             MorphSerializeHelper.__serialize_int(res, p1)
             res.seek(p1, io.SEEK_SET)
     MorphSerializeHelper.__serialize_short(res, 0xFFFF)
Пример #9
0
 def try_attach_by_referent(self, referent : 'Referent', item : 'IntOntologyItem'=None, must_be_single : bool=False) -> typing.List['Referent']:
     if (referent is None): 
         return None
     if (item is None): 
         item = referent.create_ontology_item()
     if (item is None): 
         return None
     li = self.try_attach_by_item(item)
     if (li is None): 
         return None
     res = None
     for oi in li: 
         r = Utils.ifNotNull(oi.referent, Utils.asObjectOrNull(oi.tag, Referent))
         if (r is not None): 
             if (referent.can_be_equals(r, ReferentsEqualType.WITHINONETEXT)): 
                 if (res is None): 
                     res = list()
                 if (not r in res): 
                     res.append(r)
     if (must_be_single): 
         if (res is not None and len(res) > 1): 
             i = 0
             while i < (len(res) - 1): 
                 j = i + 1
                 while j < len(res): 
                     if (not res[i].can_be_equals(res[j], ReferentsEqualType.FORMERGING)): 
                         return None
                     j += 1
                 i += 1
     return res
Пример #10
0
 def find_derivates(t : 'Token') -> typing.List['DerivateGroup']:
     res = None
     cla = None
     if (isinstance(t, NounPhraseToken)): 
         t = t.noun.end_token
         cla = MorphClass.NOUN
     if (isinstance(t, TextToken)): 
         for f in t.morph.items: 
             if (isinstance(f, MorphWordForm)): 
                 if (cla is not None): 
                     if (((cla) & f.class0_).is_undefined): 
                         continue
                 res = DerivateService.find_derivates(Utils.ifNotNull(f.normal_full, f.normal_case), True, None)
                 if (res is not None and len(res) > 0): 
                     return res
         return None
     if (isinstance(t, VerbPhraseToken)): 
         return SemanticHelper.find_derivates(t.last_verb)
     if (isinstance(t, VerbPhraseItemToken)): 
         vpt = Utils.asObjectOrNull(t, VerbPhraseItemToken)
         if (vpt.verb_morph is not None): 
             res = DerivateService.find_derivates(vpt.verb_morph.normal_case, True, t.morph.language)
             if (res is None or (len(res) == 0 and vpt.verb_morph.normal_full is not None and vpt.verb_morph.normal_case != vpt.verb_morph.normal_full)): 
                 res = DerivateService.find_derivates(vpt.verb_morph.normal_full, True, t.morph.language)
         return res
     if (isinstance(t, NumberToken)): 
         if (t.value == "1"): 
             return DerivateService.find_derivates("ОДИН", True, MorphLang.RU)
     if (isinstance(t, MetaToken)): 
         return SemanticHelper.find_derivates(t.end_token)
     return None
Пример #11
0
 def __str__(self) -> str:
     res = io.StringIO()
     print("{0}: {1}".format(Utils.enumToString(self.kind), Utils.ifNotNull(self.typ, "?")), end="", file=res, flush=True)
     if (self.parts is not None): 
         for p in self.parts: 
             print("; {0}".format(p.to_string(True, None, 0)), end="", file=res, flush=True)
     return Utils.toStringStringIO(res)
Пример #12
0
 def get_wordform(word: str, morph_info: 'MorphBaseInfo') -> str:
     """ Получить вариант написания словоформы
     
     Args:
         word(str): слово
         morph_info(MorphBaseInfo): морфологическая информация
     
     Returns:
         str: вариант написания
     """
     if (morph_info is None or Utils.isNullOrEmpty(word)):
         return word
     cla = morph_info.class0_
     if (cla.is_undefined):
         mi0 = Morphology.get_word_base_info(word, None, False, False)
         if (mi0 is not None):
             cla = mi0.class0_
     for ch in word:
         if (str.islower(ch)):
             word = word.upper()
             break
     return Utils.ifNotNull(
         Morphology.__m_inner.get_wordform(
             word, cla, morph_info.gender, morph_info.case_,
             morph_info.number, morph_info.language,
             Utils.asObjectOrNull(morph_info, MorphWordForm)), word)
Пример #13
0
 def create_specific_processor(spec_analyzer_names: str) -> 'Processor':
     """ Создать процессор с набором стандартных и указанных параметром специфических
     анализаторов.
     
     Args:
         spec_analyzer_names(str): можно несколько, разделённые запятой или точкой с запятой.
     Если список пустой, то эквивалентно CreateProcessor()
     
     Returns:
         Processor: Экземпляр процессора
     
     """
     from pullenti.ner.Processor import Processor
     if (not ProcessorService.__m_inited):
         return None
     proc = Processor()
     names = list(
         Utils.splitString((Utils.ifNotNull(spec_analyzer_names, "")),
                           ',' + ';' + ' ', False))
     for t in ProcessorService.__m_analizer_instances:
         a = t.clone()
         if (a is not None):
             if (not a.is_specific or a.name in names):
                 proc.add_analyzer(a)
     return proc
Пример #14
0
 def to_string(self,
               short_variant: bool,
               lang: 'MorphLang',
               lev: int = 0) -> str:
     res = io.StringIO()
     print(MiscHelper.convert_first_char_upper_and_other_lower(
         Utils.ifNotNull(self.typ, "?")),
           end="",
           file=res)
     org0_ = Utils.asObjectOrNull(
         self.get_slot_value(InstrumentParticipantReferent.ATTR_REF),
         Referent)
     del0_ = Utils.asObjectOrNull(
         self.get_slot_value(InstrumentParticipantReferent.ATTR_DELEGATE),
         Referent)
     if (org0_ is not None):
         print(": {0}".format(org0_.to_string(short_variant, lang, 0)),
               end="",
               file=res,
               flush=True)
         if (not short_variant and del0_ is not None):
             print(" (в лице {0})".format(
                 del0_.to_string(True, lang, lev + 1)),
                   end="",
                   file=res,
                   flush=True)
     elif (del0_ is not None):
         print(": в лице {0}".format(
             del0_.to_string(short_variant, lang, lev + 1)),
               end="",
               file=res,
               flush=True)
     return Utils.toStringStringIO(res)
Пример #15
0
 def __parse_subsent(npt : 'NounPhraseToken', t1 : 'Token', lev : int, prev : typing.List['SentItem']) -> typing.List['SentItem']:
     ok = False
     if (prev is not None): 
         for i in range(len(prev) - 1, -1, -1):
             it = prev[i]
             if (it.typ == SentItemType.CONJ or it.typ == SentItemType.DELIM): 
                 ok = True
                 break
             if (it.typ == SentItemType.VERB): 
                 break
     if (not ok): 
         return None
     sents = Utils.ifNotNull(Sentence.parse_variants(npt.end_token.next0_, t1, lev + 1, 20, SentItemType.SUBSENT), list())
     endpos = list()
     res = list()
     for s in sents: 
         s.items.insert(0, SentItem(npt))
         s.calc_coef(True)
         s.trunc_oborot(False)
         end = s.items[len(s.items) - 1].end_token.end_char
         if (end in endpos): 
             continue
         endpos.append(end)
         s.calc_coef(False)
         part = SentItem(npt)
         part.typ = SentItemType.SUBSENT
         part.sub_typ = SentItemSubtype.WICH
         part.sub_sent = s
         part.result = s.items[0].result
         part.end_token = s.items[len(s.items) - 1].end_token
         res.append(part)
     return res
Пример #16
0
 def deserializeDerivateGroup(str0_ : 'ByteArrayWrapper', dg : 'DerivateGroup') -> None:
     attr = str0_.deserializeShort()
     if (((attr & 1)) != 0): 
         dg.is_dummy = True
     if (((attr & 2)) != 0): 
         dg.not_generate = True
     if (((attr & 4)) != 0): 
         dg.m_transitive = 0
     if (((attr & 8)) != 0): 
         dg.m_transitive = 1
     dg.prefix = str0_.deserializeString()
     cou = str0_.deserializeShort()
     while cou > 0: 
         w = DerivateWord(dg)
         w.spelling = str0_.deserializeString()
         w.class0_ = MorphClass()
         w.class0_.value = (str0_.deserializeShort())
         w.lang = MorphLang._new5(str0_.deserializeShort())
         w.attrs.value = (str0_.deserializeShort())
         dg.words.append(w)
         cou -= 1
     cou = str0_.deserializeShort()
     while cou > 0: 
         pref = Utils.ifNotNull(str0_.deserializeString(), "")
         cas = MorphCase()
         cas.value = (str0_.deserializeShort())
         if (dg.nexts is None): 
             dg.nexts = dict()
         dg.nexts[pref] = cas
         cou -= 1
Пример #17
0
 def __getName(self, cyr : bool) -> str:
     name = None
     for i in range(2):
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_NAME): 
                 v = str(s.value)
                 if (Utils.isNullOrEmpty(v)): 
                     continue
                 if (i == 0): 
                     if (not LanguageHelper.isCyrillicChar(v[0])): 
                         if (cyr): 
                             continue
                     elif (not cyr): 
                         continue
                 if (name is None): 
                     name = v
                 elif (len(name) > len(v)): 
                     if ((len(v) < 4) and (len(name) < 10)): 
                         pass
                     elif (name[len(name) - 1] == 'В'): 
                         pass
                     else: 
                         name = v
                 elif ((len(name) < 4) and len(v) >= 4 and (len(v) < 10)): 
                     name = v
         if (name is not None): 
             break
     if (name == "МОЛДОВА"): 
         name = "МОЛДАВИЯ"
     elif (name == "БЕЛАРУСЬ"): 
         name = "БЕЛОРУССИЯ"
     return Utils.ifNotNull(name, "?")
Пример #18
0
 def try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken':
     res = WeaponItemToken.__try_parse(t, prev, after_conj, attach_high)
     if (res is None): 
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
         if (npt is not None and npt.noun.begin_char > npt.begin_char): 
             res = WeaponItemToken.__try_parse(npt.noun.begin_token, prev, after_conj, attach_high)
             if (res is not None): 
                 if (res.typ == WeaponItemToken.Typs.NOUN): 
                     str0_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                     if (str0_ == "РУЧНОЙ ГРАНАТ"): 
                         str0_ = "РУЧНАЯ ГРАНАТА"
                     if ((Utils.ifNotNull(str0_, "")).endswith(res.value)): 
                         if (res.alt_value is None): 
                             res.alt_value = str0_
                         else: 
                             str0_ = str0_[0:0+len(str0_) - len(res.value)].strip()
                             res.alt_value = "{0} {1}".format(str0_, res.alt_value)
                         res.begin_token = t
                         return res
         return None
     if (res.typ == WeaponItemToken.Typs.NAME): 
         br = BracketHelper.try_parse(res.end_token.next0_, BracketParseAttr.NO, 100)
         if (br is not None and br.is_char('(')): 
             alt = MiscHelper.get_text_value_of_meta_token(br, GetTextAttr.NO)
             if (MiscHelper.can_be_equal_cyr_and_latss(res.value, alt)): 
                 res.alt_value = alt
                 res.end_token = br.end_token
     return res
Пример #19
0
 def __ToString(self, short_variant : bool, lang : 'MorphLang', out_cladr : bool, lev : int) -> str:
     if (self.is_union and not self.is_state): 
         res = io.StringIO()
         print(self.getStringValue(GeoReferent.ATTR_TYPE), end="", file=res)
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_REF and (isinstance(s.value, Referent))): 
                 print("; {0}".format((s.value).toString(True, lang, 0)), end="", file=res, flush=True)
         return Utils.toStringStringIO(res)
     name = MiscHelper.convertFirstCharUpperAndOtherLower(self.__getName(lang is not None and lang.is_en))
     if (not short_variant): 
         if (not self.is_state): 
             if (self.is_city and self.is_region): 
                 pass
             else: 
                 typ = self.getStringValue(GeoReferent.ATTR_TYPE)
                 if (typ is not None): 
                     if (not self.is_city): 
                         i = typ.rfind(' ')
                         if (i > 0): 
                             typ = typ[i + 1:]
                     name = "{0} {1}".format(typ, name)
     if (not short_variant and out_cladr): 
         kladr = self.getSlotValue(GeoReferent.ATTR_FIAS)
         if (isinstance(kladr, Referent)): 
             name = "{0} (ФИАС: {1})".format(name, Utils.ifNotNull((kladr).getStringValue("GUID"), "?"))
         bti = self.getStringValue(GeoReferent.ATTR_BTI)
         if (bti is not None): 
             name = "{0} (БТИ {1})".format(name, bti)
     if (not short_variant and self.higher is not None and (lev < 10)): 
         if (((self.higher.is_city and self.is_region)) or ((self.findSlot(GeoReferent.ATTR_TYPE, "город", True) is None and self.findSlot(GeoReferent.ATTR_TYPE, "місто", True) is None and self.is_city))): 
             return "{0}; {1}".format(name, self.higher.__ToString(False, lang, False, lev + 1))
     return name
Пример #20
0
 def convert_adverb_to_adjective(adverb: str, bi: 'MorphBaseInfo') -> str:
     """ Преобразовать наречие в прилагательное (это пока только для русского языка)
     
     Args:
         adverb(str): наречие
         bi(MorphBaseInfo): род число падеж
     
     Returns:
         str: прилагательное
     """
     if (adverb is None or (len(adverb) < 4)):
         return None
     last = adverb[len(adverb) - 1]
     if (last != 'О' and last != 'Е'):
         return adverb
     var1 = adverb[0:0 + len(adverb) - 1] + "ИЙ"
     var2 = adverb[0:0 + len(adverb) - 1] + "ЫЙ"
     bi1 = Morphology.get_word_base_info(var1, None, False, False)
     bi2 = Morphology.get_word_base_info(var2, None, False, False)
     var = var1
     if (not bi1.class0_.is_adjective and bi2.class0_.is_adjective):
         var = var2
     if (bi is None):
         return var
     return Utils.ifNotNull(
         Morphology.__m_inner.get_wordform(var, MorphClass.ADJECTIVE,
                                           bi.gender, bi.case_, bi.number,
                                           MorphLang.UNKNOWN, None), var)
Пример #21
0
 def toString(self,
              short_variant: bool,
              lang: 'MorphLang' = None,
              lev: int = 0) -> str:
     nam = None
     for l_ in range(2):
         for s in self.slots:
             if (((s.type_name == UnitReferent.ATTR_NAME and short_variant))
                     or ((s.type_name == UnitReferent.ATTR_FULLNAME
                          and not short_variant))):
                 val = Utils.asObjectOrNull(s.value, str)
                 if (lang is not None and l_ == 0):
                     if (lang.is_ru != LanguageHelper.isCyrillic(val)):
                         continue
                 nam = val
                 break
         if (nam is not None):
             break
     if (nam is None):
         nam = self.getStringValue(UnitReferent.ATTR_NAME)
     pow0_ = self.getStringValue(UnitReferent.ATTR_POW)
     if (Utils.isNullOrEmpty(pow0_) or lev > 0):
         return Utils.ifNotNull(nam, "?")
     res = ("{0}{1}".format(nam, pow0_) if
            (pow0_[0] != '-') else "{0}<{1}>".format(nam, pow0_))
     if (not short_variant and self.is_unknown):
         res = ("(?)" + res)
     return res
Пример #22
0
 def _set_morph(obj : 'SemObject', wf : 'MorphWordForm') -> None:
     if (wf is None): 
         return
     obj.morph.normal_case = wf.normal_case
     obj.morph.normal_full = (Utils.ifNotNull(wf.normal_full, wf.normal_case))
     obj.morph.number = wf.number
     obj.morph.gender = wf.gender
     obj.morph.misc = wf.misc
Пример #23
0
 def get_keyword(mt : 'MetaToken') -> str:
     vpt = Utils.asObjectOrNull(mt, VerbPhraseToken)
     if (vpt is not None): 
         return Utils.ifNotNull(vpt.last_verb.verb_morph.normal_full, vpt.last_verb.verb_morph.normal_case)
     npt = Utils.asObjectOrNull(mt, NounPhraseToken)
     if (npt is not None): 
         return npt.noun.end_token.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
     return None
Пример #24
0
 def __str__(self) -> str:
     res = Utils.ifNotNull(self.unknown_name, ((str(self.unit) if self.ext_onto is None else str(self.ext_onto))))
     if (self.pow0_ != 1): 
         res = "{0}<{1}>".format(res, self.pow0_)
     if (self.is_doubt): 
         res += "?"
     if (self.keyword is not None): 
         res = "{0} (<-{1})".format(res, self.keyword.getNormalCaseText(None, False, MorphGender.UNDEFINED, False))
     return res
Пример #25
0
 def __init__(self,
              source: 'MorphToken',
              kit_: 'AnalysisKit',
              bchar: int = -1,
              echar: int = -1) -> None:
     super().__init__(kit_, (bchar if bchar >= 0 else
                             (0 if source is None else source.begin_char)),
                      (echar if echar >= 0 else
                       (0 if source is None else source.end_char)))
     self.term = None
     self.lemma = None
     self.term0 = None
     self.invariant_prefix_length_of_morph_vars = 0
     self.max_length_of_morph_vars = 0
     if (source is None):
         return
     self.chars = source.char_info
     self.term = source.term
     self.lemma = (Utils.ifNotNull(source.get_lemma(), self.term))
     self.max_length_of_morph_vars = (len(self.term))
     self.morph = MorphCollection()
     if (source.word_forms is not None):
         for wf in source.word_forms:
             self.morph.add_item(wf)
             if (wf.normal_case is not None and
                 (self.max_length_of_morph_vars < len(wf.normal_case))):
                 self.max_length_of_morph_vars = (len(wf.normal_case))
             if (wf.normal_full is not None and
                 (self.max_length_of_morph_vars < len(wf.normal_full))):
                 self.max_length_of_morph_vars = (len(wf.normal_full))
     i = 0
     while i < len(self.term):
         ch = self.term[i]
         j = 0
         j = 0
         while j < self.morph.items_count:
             wf = Utils.asObjectOrNull(self.morph.get_indexer_item(j),
                                       MorphWordForm)
             if (wf.normal_case is not None):
                 if (i >= len(wf.normal_case)):
                     break
                 if (wf.normal_case[i] != ch):
                     break
             if (wf.normal_full is not None):
                 if (i >= len(wf.normal_full)):
                     break
                 if (wf.normal_full[i] != ch):
                     break
             j += 1
         if (j < self.morph.items_count):
             break
         self.invariant_prefix_length_of_morph_vars = ((i + 1))
         i += 1
     if (self.morph.language.is_undefined
             and not source.language.is_undefined):
         self.morph.language = source.language
Пример #26
0
 def year(self) -> int:
     wrapyear395 = RefOutArgWrapper(0)
     inoutres396 = Utils.tryParseInt(
         Utils.ifNotNull(self.getStringValue(BookLinkReferent.ATTR_YEAR),
                         ""), wrapyear395)
     year_ = wrapyear395.value
     if (inoutres396):
         return year_
     else:
         return 0
Пример #27
0
 def refresh_variants(self) -> None:
     vars0_ = list()
     for v in self.variants_list: 
         vars0_.extend(v)
     self.variants.clear()
     self.variants_key.clear()
     self.variants_list.clear()
     for v in vars0_: 
         li = [ ]
         wrapli38 = RefOutArgWrapper(None)
         inoutres39 = Utils.tryGetValue(self.variants, Utils.ifNotNull(v.tail, ""), wrapli38)
         li = wrapli38.value
         if (not inoutres39): 
             li = list()
             self.variants[Utils.ifNotNull(v.tail, "")] = li
         li.append(v)
     for kp in self.variants.items(): 
         self.variants_key.append(kp[0])
         self.variants_list.append(kp[1])
Пример #28
0
 def normal(self) -> str:
     """ Нормализованное значение """
     wf = self.verb_morph
     if (wf is not None): 
         if (not wf.class0_.is_adjective and not wf.case_.is_undefined and self.__m_normal is not None): 
             return self.__m_normal
         if (wf.class0_.is_adjective and not wf.class0_.is_verb): 
             return Utils.ifNotNull(wf.normal_full, wf.normal_case)
         return wf.normal_case
     return self.__m_normal
Пример #29
0
 def toString(self, short_variant : bool, lang : 'MorphLang'=None, lev : int=0) -> str:
     if (self.scheme is not None): 
         split = ":"
         if (self.scheme == "ISBN" or self.scheme == "ББК" or self.scheme == "УДК"): 
             split = " "
         elif (self.scheme == "http" or self.scheme == "ftp" or self.scheme == "https"): 
             split = "://"
         return "{0}{1}{2}".format(self.scheme, split, Utils.ifNotNull(self.value, "?"))
     else: 
         return self.value
 def tryParse(t : 'Token') -> 'DefinitionWithNumericToken':
     """ Выделить определение с указанного токена
     
     Args:
         t(Token): токен
     
     """
     if (not MiscHelper.canBeStartOfSentence(t)): 
         return None
     tt = t
     noun_ = None
     num = None
     first_pass2886 = True
     while True:
         if first_pass2886: first_pass2886 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt != t and MiscHelper.canBeStartOfSentence(tt)): 
             return None
         if (not ((isinstance(tt, NumberToken)))): 
             continue
         if (tt.whitespaces_after_count > 2 or tt == t): 
             continue
         if (tt.morph.class0_.is_adjective): 
             continue
         nn = NounPhraseHelper.tryParse(tt.next0_, NounPhraseParseAttr.NO, 0)
         if (nn is None): 
             continue
         num = (Utils.asObjectOrNull(tt, NumberToken))
         noun_ = nn
         break
     if (num is None or num.int_value is None): 
         return None
     res = DefinitionWithNumericToken(t, noun_.end_token)
     res.number = num.int_value
     res.number_begin_char = num.begin_char
     res.number_end_char = num.end_char
     res.noun = noun_.getNormalCaseText(None, True, MorphGender.UNDEFINED, False)
     res.nouns_genetive = (Utils.ifNotNull(noun_.getMorphVariant(MorphCase.GENITIVE, True), (res.noun if res is not None else None)))
     res.text = MiscHelper.getTextValue(t, num.previous, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr))
     if (num.is_whitespace_before): 
         res.text += " "
     res.number_substring = MiscHelper.getTextValue(num, noun_.end_token, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr))
     res.text += res.number_substring
     tt = noun_.end_token
     while tt is not None: 
         if (MiscHelper.canBeStartOfSentence(tt)): 
             break
         res.end_token = tt
         tt = tt.next0_
     if (res.end_token != noun_.end_token): 
         if (noun_.is_whitespace_after): 
             res.text += " "
         res.text += MiscHelper.getTextValue(noun_.end_token.next0_, res.end_token, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr))
     return res