def checkValue(self, dict0_: typing.List[tuple]) -> object: """ Попробовать привязать словарь Args: dict0_(typing.List[tuple]): """ if (dict0_ is None): return None wrapres2699 = RefOutArgWrapper(None) inoutres2700 = Utils.tryGetValue(dict0_, self.term, wrapres2699) res = wrapres2699.value if (inoutres2700): return res if (self.morph is not None): for it in self.morph.items: mf = Utils.asObjectOrNull(it, MorphWordForm) if (mf is not None): if (mf.normal_case is not None): wrapres2695 = RefOutArgWrapper(None) inoutres2696 = Utils.tryGetValue( dict0_, mf.normal_case, wrapres2695) res = wrapres2695.value if (inoutres2696): return res if (mf.normal_full is not None and mf.normal_case != mf.normal_full): wrapres2697 = RefOutArgWrapper(None) inoutres2698 = Utils.tryGetValue( dict0_, mf.normal_full, wrapres2697) res = wrapres2697.value if (inoutres2698): return res return None
def __find_in_tree(self, key : str, lang : 'MorphLang') -> typing.List['Termin']: if (key is None): return None nod = self.__get_root(lang, ((lang is None or lang.is_undefined)) and LanguageHelper.is_latin(key)) i = 0 while i < len(key): ch = ord(key[i]) nn = None if (nod.children is not None): wrapnn582 = RefOutArgWrapper(None) Utils.tryGetValue(nod.children, ch, wrapnn582) nn = wrapnn582.value if (nn is None): if (ch == (32)): if (nod.termins is not None): pp = Utils.splitString(key, ' ', False) res = None for t in nod.termins: if (len(t.terms) == len(pp)): k = 0 k = 1 while k < len(pp): if (not pp[k] in t.terms[k].variants): break k += 1 if (k >= len(pp)): if (res is None): res = list() res.append(t) return res return None nod = nn i += 1 return nod.termins
def __get_bigrams_info(self, si1: 'StatisticWordInfo', si2: 'StatisticWordInfo') -> 'StatisticBigrammInfo': res = StatisticBigrammInfo._new552(si1.total_count, si2.total_count) di12 = None wrapdi12554 = RefOutArgWrapper(None) Utils.tryGetValue(self.__m_bigramms, si1.normal, wrapdi12554) di12 = wrapdi12554.value di21 = None wrapdi21553 = RefOutArgWrapper(None) Utils.tryGetValue(self.__m_bigramms_rev, si2.normal, wrapdi21553) di21 = wrapdi21553.value if (di12 is not None): if (not si2.normal in di12): res.first_has_other_second = True else: res.pair_count = di12[si2.normal] if (len(di12) > 1): res.first_has_other_second = True if (di21 is not None): if (not si1.normal in di21): res.second_has_other_first = True elif (not si1.normal in di21): res.second_has_other_first = True elif (len(di21) > 1): res.second_has_other_first = True return res
def __getBigramsInfo(self, si1: 'WordInfo', si2: 'WordInfo') -> 'BigrammInfo': res = StatisticCollection.BigrammInfo._new597(si1.total_count, si2.total_count) di12 = None wrapdi12599 = RefOutArgWrapper(None) Utils.tryGetValue(self.__m_bigramms, si1.normal, wrapdi12599) di12 = wrapdi12599.value di21 = None wrapdi21598 = RefOutArgWrapper(None) Utils.tryGetValue(self.__m_bigramms_rev, si2.normal, wrapdi21598) di21 = wrapdi21598.value if (di12 is not None): if (not si2.normal in di12): res.first_has_other_second = True else: res.pair_count = di12[si2.normal] if (len(di12) > 1): res.first_has_other_second = True if (di21 is not None): if (not si1.normal in di21): res.second_has_other_first = True elif (not si1.normal in di21): res.second_has_other_first = True elif (len(di21) > 1): res.second_has_other_first = True return res
def check_value(self, dict0_: typing.List[tuple]) -> object: """ Попробовать привязать словарь Args: dict0_(typing.List[tuple]): """ if (dict0_ is None): return None res = None wrapres2868 = RefOutArgWrapper(None) inoutres2869 = Utils.tryGetValue(dict0_, self.term, wrapres2868) res = wrapres2868.value if (inoutres2869): return res if (self.morph is not None): for it in self.morph.items: mf = Utils.asObjectOrNull(it, MorphWordForm) if (mf is not None): if (mf.normal_case is not None): wrapres2864 = RefOutArgWrapper(None) inoutres2865 = Utils.tryGetValue( dict0_, mf.normal_case, wrapres2864) res = wrapres2864.value if (inoutres2865): return res if (mf.normal_full is not None and mf.normal_case != mf.normal_full): wrapres2866 = RefOutArgWrapper(None) inoutres2867 = Utils.tryGetValue( dict0_, mf.normal_full, wrapres2866) res = wrapres2866.value if (inoutres2867): return res return None
def __add_bigramm(self, b1: 'StatisticWordInfo', b2: 'StatisticWordInfo') -> None: di = None wrapdi550 = RefOutArgWrapper(None) inoutres551 = Utils.tryGetValue(self.__m_bigramms, b1.normal, wrapdi550) di = wrapdi550.value if (not inoutres551): di = dict() self.__m_bigramms[b1.normal] = di if (b2.normal in di): di[b2.normal] += 1 else: di[b2.normal] = 1 wrapdi548 = RefOutArgWrapper(None) inoutres549 = Utils.tryGetValue(self.__m_bigramms_rev, b2.normal, wrapdi548) di = wrapdi548.value if (not inoutres549): di = dict() self.__m_bigramms_rev[b2.normal] = di if (b1.normal in di): di[b1.normal] += 1 else: di[b1.normal] = 1
def initialize() -> None: if (ShortNameHelper.M_INITED): return ShortNameHelper.M_INITED = True obj = PullentiNerPersonInternalResourceHelper.get_string("ShortNames.txt") if (obj is not None): kit = AnalysisKit(SourceOfAnalysis(obj)) t = kit.first_token while t is not None: if (t.is_newline_before): g = (MorphGender.FEMINIE if t.is_value("F", None) else MorphGender.MASCULINE) t = t.next0_ nam = t.term shos = list() t = t.next0_ while t is not None: if (t.is_newline_before): break else: shos.append(t.term) t = t.next0_ for s in shos: li = None wrapli2599 = RefOutArgWrapper(None) inoutres2600 = Utils.tryGetValue(ShortNameHelper.M_SHORTS_NAMES, s, wrapli2599) li = wrapli2599.value if (not inoutres2600): li = list() ShortNameHelper.M_SHORTS_NAMES[s] = li li.append(ShortNameHelper.ShortnameVar._new2598(nam, g)) if (t is None): break t = t.previous t = t.next0_
def attachReferent(self, r : 'Referent') -> typing.List['ExtOntologyItem']: """ Привязать сущность Args: r(Referent): Returns: typing.List[ExtOntologyItem]: null или список подходящих элементов """ if (self.__m_hash is None): self.__initHash() wraponto2652 = RefOutArgWrapper(None) inoutres2653 = Utils.tryGetValue(self.__m_hash, r.type_name, wraponto2652) onto = wraponto2652.value if (not inoutres2653): return None li = onto.tryAttachByReferent(r, None, False) if (li is None or len(li) == 0): return None res = None for rr in li: if (rr.ontology_items is not None): if (res is None): res = list() res.extend(rr.ontology_items) return res
def getAnalyzerData(self, analyzer : 'Analyzer') -> 'AnalyzerData': """ Работа с локальными данными анализаторов Args: analyzer(Analyzer): """ if (analyzer is None or analyzer.name is None): return None wrapd543 = RefOutArgWrapper(None) inoutres544 = Utils.tryGetValue(self.__m_datas, analyzer.name, wrapd543) d = wrapd543.value if (inoutres544): d.kit = self return d default_data = analyzer.createAnalyzerData() if (default_data is None): return None if (analyzer._persist_referents_regim): if (analyzer._persist_analizer_data is None): analyzer._persist_analizer_data = default_data else: default_data = analyzer._persist_analizer_data self.__m_datas[analyzer.name] = default_data default_data.kit = self return default_data
def attach_referent(self, r: 'Referent') -> typing.List['ExtOntologyItem']: """ Привязать сущность к существующей записи Args: r(Referent): внешняя сущность Returns: typing.List[ExtOntologyItem]: null или список подходящих элементов """ if (self.__m_hash is None): self.__init_hash() onto = None wraponto2818 = RefOutArgWrapper(None) inoutres2819 = Utils.tryGetValue(self.__m_hash, r.type_name, wraponto2818) onto = wraponto2818.value if (not inoutres2819): return None li = onto.try_attach_by_referent(r, None, False) if (li is None or len(li) == 0): return None res = None for rr in li: if (rr.ontology_items is not None): if (res is None): res = list() res.extend(rr.ontology_items) return res
def getCountryPrefix(full_number: str) -> str: """ Выделить телефонный префикс из "полного" номера Args: full_number(str): """ if (full_number is None): return None nod = PhoneHelper.M_PHONE_ROOT max_ind = -1 i = 0 while i < len(full_number): dig = full_number[i] wrapnn2464 = RefOutArgWrapper(None) inoutres2465 = Utils.tryGetValue(nod.children, dig, wrapnn2464) nn = wrapnn2464.value if (not inoutres2465): break if (nn.countries is not None and len(nn.countries) > 0): max_ind = i nod = nn i += 1 if (max_ind < 0): return None else: return full_number[0:0 + max_ind + 1]
def find_by_spel(spel: str) -> 'NextModelItem': wrapres54 = RefOutArgWrapper(None) inoutres55 = Utils.tryGetValue(NextModelHelper.__m_hash_by_spel, spel, wrapres54) res = wrapres54.value if (not inoutres55): return None return res
def __manage_var(self, token : 'Token', pars : 'TerminParseAttr', v : str, nod : 'CharNode', i0 : int, res : typing.List['TerminToken']) -> bool: i = i0 while i < len(v): ch = ord(v[i]) if (nod.children is None): return False nn = None wrapnn597 = RefOutArgWrapper(None) inoutres598 = Utils.tryGetValue(nod.children, ch, wrapnn597) nn = wrapnn597.value if (not inoutres598): return False nod = nn i += 1 vars0_ = nod.termins if (vars0_ is None or len(vars0_) == 0): return False for t in vars0_: ar = t.try_parse(token, pars) if (ar is not None): ar.termin = t if (res.value is None): res.value = list() res.value.append(ar) elif (ar.tokens_count > res.value[0].tokens_count): res.value.clear() res.value.append(ar) elif (ar.tokens_count == res.value[0].tokens_count): j = 0 j = 0 while j < len(res.value): if (res.value[j].termin == ar.termin): break j += 1 if (j >= len(res.value)): res.value.append(ar) if (t.additional_vars is not None): for av in t.additional_vars: ar = av.try_parse(token, pars) if (ar is None): continue ar.termin = t if (res.value is None): res.value = list() res.value.append(ar) elif (ar.tokens_count > res.value[0].tokens_count): res.value.clear() res.value.append(ar) elif (ar.tokens_count == res.value[0].tokens_count): j = 0 j = 0 while j < len(res.value): if (res.value[j].termin == ar.termin): break j += 1 if (j >= len(res.value)): res.value.append(ar) return len(v) > 1
def _get_analyzer_data(self, type_name: str) -> 'AnalyzerData': a = None wrapa2813 = RefOutArgWrapper(None) inoutres2814 = Utils.tryGetValue(self.__m_anal_by_type, type_name, wrapa2813) a = wrapa2813.value if (not inoutres2814): return None return a._persist_analizer_data
def normalize_preposition(prep: str) -> str: wrapres62 = RefOutArgWrapper(None) inoutres63 = Utils.tryGetValue(LanguageHelper.__m_prep_norms, prep, wrapres62) res = wrapres62.value if (inoutres63): return res else: return prep
def initialize() -> None: if (OrgGlobal.GLOBAL_ORGS is not None): return OrgGlobal.GLOBAL_ORGS = IntOntologyCollection() org0_ = None oi = None with ProcessorService.create_empty_processor() as geo_proc: geo_proc.add_analyzer(GeoAnalyzer()) geos = dict() for k in range(3): lang = (MorphLang.RU if k == 0 else (MorphLang.EN if k == 1 else MorphLang.UA)) name = ("Orgs_ru.dat" if k == 0 else ("Orgs_en.dat" if k == 1 else "Orgs_ua.dat")) dat = PullentiNerOrgInternalResourceHelper.get_bytes(name) if (dat is None): raise Utils.newException("Can't file resource file {0} in Organization analyzer".format(name), None) with MemoryStream(OrgItemTypeToken._deflate(dat)) as tmp: tmp.position = 0 xml0_ = None # new XmlDocument xml0_ = Utils.parseXmlFromStream(tmp) for x in xml0_.getroot(): org0_ = OrganizationReferent() abbr = None for xx in x: if (Utils.getXmlLocalName(xx) == "typ"): org0_.add_slot(OrganizationReferent.ATTR_TYPE, Utils.getXmlInnerText(xx), False, 0) elif (Utils.getXmlLocalName(xx) == "nam"): org0_.add_slot(OrganizationReferent.ATTR_NAME, Utils.getXmlInnerText(xx), False, 0) elif (Utils.getXmlLocalName(xx) == "epo"): org0_.add_slot(OrganizationReferent.ATTR_EPONYM, Utils.getXmlInnerText(xx), False, 0) elif (Utils.getXmlLocalName(xx) == "prof"): org0_.add_slot(OrganizationReferent.ATTR_PROFILE, Utils.getXmlInnerText(xx), False, 0) elif (Utils.getXmlLocalName(xx) == "abbr"): abbr = Utils.getXmlInnerText(xx) elif (Utils.getXmlLocalName(xx) == "geo"): geo_ = None wrapgeo1767 = RefOutArgWrapper(None) inoutres1768 = Utils.tryGetValue(geos, Utils.getXmlInnerText(xx), wrapgeo1767) geo_ = wrapgeo1767.value if (not inoutres1768): ar = geo_proc.process(SourceOfAnalysis(Utils.getXmlInnerText(xx)), None, lang) if (ar is not None and len(ar.entities) == 1 and (isinstance(ar.entities[0], GeoReferent))): geo_ = (Utils.asObjectOrNull(ar.entities[0], GeoReferent)) geos[Utils.getXmlInnerText(xx)] = geo_ else: pass if (geo_ is not None): org0_.add_slot(OrganizationReferent.ATTR_GEO, geo_, False, 0) oi = org0_.create_ontology_item_ex(2, True, True) if (oi is None): continue if (abbr is not None): oi.termins.append(Termin(abbr, None, True)) if (k == 2): OrgGlobal.GLOBAL_ORGS_UA.add_item(oi) else: OrgGlobal.GLOBAL_ORGS.add_item(oi) return
def get_names_for_shortname(shortname : str) -> typing.List['ShortnameVar']: res = [ ] wrapres2596 = RefOutArgWrapper(None) inoutres2597 = Utils.tryGetValue(ShortNameHelper.M_SHORTS_NAMES, shortname, wrapres2596) res = wrapres2596.value if (not inoutres2597): return None else: return res
def find_by_spel(spel: str) -> 'ControlModelQuestion': ind = 0 wrapind2966 = RefOutArgWrapper(0) inoutres2967 = Utils.tryGetValue(ControlModelQuestion.__m_hash_by_spel, spel, wrapind2966) ind = wrapind2966.value if (not inoutres2967): return None return ControlModelQuestion.ITEMS[ind]
def registerMorphInfo(self, var : 'MorphMiscInfo') -> 'MorphMiscInfo': key = str(var) wrapv30 = RefOutArgWrapper(None) inoutres31 = Utils.tryGetValue(self._m_vars_hash, key, wrapv30) v = wrapv30.value if (inoutres31): return v self._m_vars_hash[key] = var self._m_vars.append(var) return var
def __add_to_hash1(self, key : int, t : 'Termin') -> None: li = None wrapli583 = RefOutArgWrapper(None) inoutres584 = Utils.tryGetValue(self.__m_hash1, key, wrapli583) li = wrapli583.value if (not inoutres584): li = list() self.__m_hash1[key] = li if (not t in li): li.append(t)
def get_case_after_preposition(prep: str) -> 'MorphCase': mc = None wrapmc25 = RefOutArgWrapper(None) inoutres26 = Utils.tryGetValue(LanguageHelper.__m_prep_cases, prep, wrapmc25) mc = wrapmc25.value if (inoutres26): return mc else: return MorphCase.UNDEFINED
def __addToHash1(self, key: int, t: 'Termin') -> None: li = None wrapli618 = RefOutArgWrapper(None) inoutres619 = Utils.tryGetValue(self.__m_hash1, key, wrapli618) li = wrapli618.value if (not inoutres619): li = list() self.__m_hash1[key] = li if (not t in li): li.append(t)
def __create_referent(self, type_name: str, definition_: str) -> typing.List['Referent']: analyzer = None wrapanalyzer2809 = RefOutArgWrapper(None) inoutres2810 = Utils.tryGetValue(self.__m_anal_by_type, type_name, wrapanalyzer2809) analyzer = wrapanalyzer2809.value if (not inoutres2810): return None sf = SourceOfAnalysis(definition_) ar = self.__m_processor._process(sf, True, True, None, None) if (ar is None or ar.first_token is None): return None r0 = ar.first_token.get_referent() t = None if (r0 is not None): if (r0.type_name != type_name): r0 = (None) if (r0 is not None): t = ar.first_token else: rt = analyzer.process_ontology_item(ar.first_token) if (rt is None): return None r0 = rt.referent t = rt.end_token t = t.next0_ first_pass3432 = True while True: if first_pass3432: first_pass3432 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_char(';') and t.next0_ is not None): r1 = t.next0_.get_referent() if (r1 is None): rt = analyzer.process_ontology_item(t.next0_) if (rt is None): continue t = rt.end_token r1 = rt.referent if (r1.type_name == type_name): r0.merge_slots(r1, True) r1.tag = r0 if (r0 is None): return None r0.tag = r0 r0 = analyzer._persist_analizer_data.register_referent(r0) self.__m_processor._create_res(ar.first_token.kit, ar, None, True) res = list() res.append(r0) for e0_ in ar.entities: if (e0_.tag is None): res.append(e0_) return res
def check_next(self, prep: str, cas: 'MorphCase') -> bool: if (self.nexts is None): return False cas0 = None wrapcas02910 = RefOutArgWrapper(None) inoutres2911 = Utils.tryGetValue(self.nexts, Utils.ifNotNull(prep, ""), wrapcas02910) cas0 = wrapcas02910.value if (not inoutres2911): return False return not ((cas0) & cas).is_undefined
def __check_corr_var(self, word: str, tn: 'MorphTreeNode', i: int) -> str: first_pass2893 = True while True: if first_pass2893: first_pass2893 = False else: i += 1 if (not (i <= len(word))): break if (tn.lazy_pos > 0): self.__load_tree_node(tn) if (tn.rules is not None): word_begin = "" word_end = "" if (i > 0): word_begin = word[0:0 + i] else: word_end = word if (i < len(word)): word_end = word[i:] else: word_begin = word for r in tn.rules: if (word_end in r.variants): return word_begin + word_end if (word_end.find('*') >= 0): for v in r.variants_key: if (len(v) == len(word_end)): j = 0 while j < len(v): if (word_end[j] == '*' or word_end[j] == v[j]): pass else: break j += 1 if (j >= len(v)): return word_begin + v if (tn.nodes is None or i >= len(word)): break ch = ord(word[i]) if (ch != (0x2A)): wraptn32 = RefOutArgWrapper(None) inoutres33 = Utils.tryGetValue(tn.nodes, ch, wraptn32) tn = wraptn32.value if (inoutres33): continue break if (tn.nodes is not None): for tnn in tn.nodes.items(): ww = word.replace('*', chr(tnn[0])) res = self.__check_corr_var(ww, tnn[1], i + 1) if (res is not None): return res break return None
def find_termins_by_canonic_text(self, text : str) -> typing.List['Termin']: if (self.__m_hash_canonic is None): self.__m_hash_canonic = dict() for t in self.termins: ct = t.canonic_text li = [ ] wrapli599 = RefOutArgWrapper(None) inoutres600 = Utils.tryGetValue(self.__m_hash_canonic, ct, wrapli599) li = wrapli599.value if (not inoutres600): li = list() self.__m_hash_canonic[ct] = li if (not t in li): li.append(t) res = [ ] wrapres601 = RefOutArgWrapper(None) inoutres602 = Utils.tryGetValue(self.__m_hash_canonic, text, wrapres601) res = wrapres601.value if (not inoutres602): return None else: return res
def attach_token(self, type_name: str, t: 'Token') -> typing.List['IntOntologyToken']: # Используется внутренним образом if (self.__m_hash is None): self.__init_hash() onto = None wraponto2820 = RefOutArgWrapper(None) inoutres2821 = Utils.tryGetValue(self.__m_hash, type_name, wraponto2820) onto = wraponto2820.value if (not inoutres2821): return None return onto.try_attach(t, None, False)
def _getAnalyzerData(self, type_name : str) -> 'AnalyzerData': """ Используется внутренним образом Args: type_name(str): """ wrapa2647 = RefOutArgWrapper(None) inoutres2648 = Utils.tryGetValue(self.__m_anal_by_type, type_name, wrapa2647) a = wrapa2647.value if (not inoutres2648): return None return a._persist_analizer_data
def manageReferentLinks(self) -> None: if (self.__m_refs is not None): for pr in self.__m_refs: wrapr2676 = RefOutArgWrapper(None) inoutres2677 = Utils.tryGetValue(self.__m_links2, pr.identity, wrapr2676) r = wrapr2676.value if (pr.identity is not None and self.__m_links2 is not None and inoutres2677): pr.owner_referent.uploadSlot(pr.owner_slot, r) else: wrapr2674 = RefOutArgWrapper(None) inoutres2675 = Utils.tryGetValue(self.__m_links, pr.value, wrapr2674) r = wrapr2674.value if (self.__m_links is not None and inoutres2675): pr.owner_referent.uploadSlot(pr.owner_slot, r) else: pass self.__m_links2 = None self.__m_links = self.__m_links2 self.__m_refs = (None)
def __manage_reverce_nodes(root: 'MorphTreeNode', tn: 'MorphTreeNode', term: str) -> None: if (tn.rules is not None): for r in tn.rules: for v in r.variants.items(): wf = term + v[0] if (len(wf) <= MorphSerializeHelper.__min_tail_len): continue rtn = root lev = 0 first_pass2895 = True while True: if first_pass2895: first_pass2895 = False else: lev += 1 if (not (lev < MorphSerializeHelper.__max_tail_len)): break i = len(wf) - 1 - lev if (i < 0): break ch = ord(wf[i]) if (rtn.nodes is None): rtn.nodes = dict() next0_ = None wrapnext52 = RefOutArgWrapper(None) inoutres53 = Utils.tryGetValue(rtn.nodes, ch, wrapnext52) next0_ = wrapnext52.value if (not inoutres53): next0_ = MorphTreeNode() rtn.nodes[ch] = next0_ rtn = next0_ if ((lev + 1) < MorphSerializeHelper.__min_tail_len): continue if (rtn.reverce_variants is None): rtn.reverce_variants = list() for mrf in v[1]: has = False for mfv0 in rtn.reverce_variants: if (mfv0.compare(mrf)): mfv0.coef += 1 has = True break if (not has): mrf0 = MorphRuleVariant(mrf) mrf0.coef = (1) rtn.reverce_variants.append(mrf0) break if (tn.nodes is not None): for tch in tn.nodes.items(): MorphSerializeHelper.__manage_reverce_nodes( root, tch[1], "{0}{1}".format(term, (chr(tch[0]))))