def compare_to(self, other: 'NextModelItem') -> int: i = Utils.compareStrings(self.preposition, other.preposition, False) if (i != 0): return i if (self.__cas_rank() < other.__cas_rank()): return -1 if (self.__cas_rank() > other.__cas_rank()): return 1 return 0
def __compare_to(self, other: 'ControlModelQuestion') -> int: i = Utils.compareStrings(self.preposition, other.preposition, False) if (i != 0): return i if (self.__cas_rank() < other.__cas_rank()): return -1 if (self.__cas_rank() > other.__cas_rank()): return 1 return 0
def convert_outer_value_to_inner_value(self, outer_value: str) -> str: if (outer_value is None): return None i = 0 while i < len(self.outer_values): if (Utils.compareStrings(self.outer_values[i], outer_value, True) == 0 and (i < len(self.inner_values))): return self.inner_values[i] elif ((i < len(self.outer_valuesua)) and self.outer_valuesua[i] == outer_value): return self.inner_values[i] i += 1 return outer_value
def convertOuterValueToInnerValue(self, outer_value: object) -> object: val = Utils.asObjectOrNull(outer_value, str) if (val is None): return outer_value i = 0 while i < len(self.outer_values): if (Utils.compareStrings(self.outer_values[i], val, True) == 0 and (i < len(self.inner_values))): return self.inner_values[i] elif ((i < len(self.outer_valuesua)) and self.outer_valuesua[i] == val): return self.inner_values[i] i += 1 return outer_value
def convert_inner_value_to_outer_value(self, inner_value: str, lang: 'MorphLang' = None) -> str: if (inner_value is None): return None val = str(inner_value) i = 0 while i < len(self.inner_values): if (Utils.compareStrings(self.inner_values[i], val, True) == 0 and (i < len(self.outer_values))): if (lang is not None): if (lang.is_ua and (i < len(self.outer_valuesua)) and self.outer_valuesua[i] is not None): return self.outer_valuesua[i] if (lang.is_en and (i < len(self.outer_valuesen)) and self.outer_valuesen[i] is not None): return self.outer_valuesen[i] return self.outer_values[i] i += 1 return inner_value
def __compareValues(self, val1: object, val2: object, use_can_be_equals_for_referents: bool) -> bool: if (val1 is None): return val2 is None if (val2 is None): return val1 is None if (val1 == val2): return True if ((isinstance(val1, Referent)) and (isinstance(val2, Referent))): if (use_can_be_equals_for_referents): return (val1).canBeEquals(Utils.asObjectOrNull(val2, Referent), Referent.EqualType.DIFFERENTTEXTS) else: return False if (isinstance(val1, str)): if (not ((isinstance(val2, str)))): return False s1 = val1 s2 = val2 i = Utils.compareStrings(s1, s2, True) return i == 0 return val1 == val2
def __attach_uri_content( t0: 'Token', chars_: str, can_be_whitespaces: bool = False) -> 'UriItemToken': txt = io.StringIO() t1 = t0 dom = UriItemToken.attach_domain_name(t0, True, can_be_whitespaces) if (dom is not None): if (len(dom.value) < 3): return None open_char = chr(0) t = t0 if (dom is not None): t = dom.end_token.next0_ first_pass3411 = True while True: if first_pass3411: first_pass3411 = False else: t = t.next0_ if (not (t is not None)): break if (t != t0 and t.is_whitespace_before): if (t.is_newline_before or not can_be_whitespaces): break if (dom is None): break if (t.previous.is_hiphen): pass elif (t.previous.is_char_of(",;")): break elif (t.previous.is_char('.') and t.chars.is_letter and t.length_char == 2): pass else: ok = False tt1 = t if (t.is_char_of("\\/")): tt1 = t.next0_ tt0 = tt1 first_pass3412 = True while True: if first_pass3412: first_pass3412 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1 != tt0 and tt1.is_whitespace_before): break if (isinstance(tt1, NumberToken)): continue if (not (isinstance(tt1, TextToken))): break term1 = tt1.term if (((term1 == "HTM" or term1 == "HTML" or term1 == "SHTML") or term1 == "ASP" or term1 == "ASPX") or term1 == "JSP"): ok = True break if (not tt1.chars.is_letter): if (tt1.is_char_of("\\/")): ok = True break if (not tt1.is_char_of(chars_)): break elif (not tt1.chars.is_latin_letter): break if (not ok): break if (isinstance(t, NumberToken)): nt = Utils.asObjectOrNull(t, NumberToken) print(nt.get_source_text(), end="", file=txt) t1 = t continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): rt = Utils.asObjectOrNull(t, ReferentToken) if (rt is not None and rt.begin_token.is_value("РФ", None)): if (txt.tell() > 0 and Utils.getCharAtStringIO( txt, txt.tell() - 1) == '.'): print(rt.begin_token.get_source_text(), end="", file=txt) t1 = t continue if (rt is not None and rt.chars.is_latin_letter and rt.begin_token == rt.end_token): print(rt.begin_token.get_source_text(), end="", file=txt) t1 = t continue break src = tt.get_source_text() ch = src[0] if (not str.isalpha(ch)): if (chars_.find(ch) < 0): break if (ch == '(' or ch == '['): open_char = ch elif (ch == ')'): if (open_char != '('): break open_char = (chr(0)) elif (ch == ']'): if (open_char != '['): break open_char = (chr(0)) print(src, end="", file=txt) t1 = t if (txt.tell() == 0): return dom i = 0 i = 0 while i < txt.tell(): if (str.isalnum(Utils.getCharAtStringIO(txt, i))): break i += 1 if (i >= txt.tell()): return dom if (Utils.getCharAtStringIO(txt, txt.tell() - 1) == '.' or Utils.getCharAtStringIO(txt, txt.tell() - 1) == '/'): Utils.setLengthStringIO(txt, txt.tell() - 1) t1 = t1.previous if (dom is not None): Utils.insertStringIO(txt, 0, dom.value) tmp = Utils.toStringStringIO(txt) if (tmp.startswith("\\\\")): Utils.replaceStringIO(txt, "\\\\", "//") tmp = Utils.toStringStringIO(txt) if (tmp.startswith("//")): tmp = tmp[2:] if (Utils.compareStrings(tmp, "WWW", True) == 0): return None res = UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt)) return res
def attach_domain_name(t0: 'Token', check_: bool, can_be_whitspaces: bool) -> 'UriItemToken': txt = io.StringIO() t1 = t0 ip_count = 0 is_ip = True t = t0 first_pass3413 = True while True: if first_pass3413: first_pass3413 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_whitespace_before and t != t0): ok = False if (not t.is_newline_before and can_be_whitspaces): tt1 = t first_pass3414 = True while True: if first_pass3414: first_pass3414 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1.is_char('.') or tt1.is_hiphen): continue if (tt1.is_whitespace_before): if (tt1.is_newline_before): break if (tt1.previous is not None and ((tt1.previous.is_char('.') or tt1.previous.is_hiphen))): pass else: break if (not (isinstance(tt1, TextToken))): break if (UriItemToken.__m_std_groups.try_parse( tt1, TerminParseAttr.NO) is not None): ok = True break if (not tt1.chars.is_latin_letter): break if (not ok): break if (isinstance(t, NumberToken)): nt = Utils.asObjectOrNull(t, NumberToken) if (nt.int_value is None): break print(nt.get_source_text(), end="", file=txt) t1 = t if (nt.typ == NumberSpellingType.DIGIT and nt.int_value >= 0 and (nt.int_value < 256)): ip_count += 1 else: is_ip = False continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break src = tt.term ch = src[0] if (not str.isalpha(ch)): if (".-_".find(ch) < 0): break if (ch != '.'): is_ip = False if (ch == '-'): if (Utils.compareStrings(Utils.toStringStringIO(txt), "vk.com", True) == 0): return UriItemToken._new2706( t0, t1, Utils.toStringStringIO(txt).lower()) else: is_ip = False print(src.lower(), end="", file=txt) t1 = t if (txt.tell() == 0): return None if (ip_count != 4): is_ip = False i = 0 points = 0 i = 0 while i < txt.tell(): if (Utils.getCharAtStringIO(txt, i) == '.'): if (i == 0): return None if (i >= (txt.tell() - 1)): Utils.setLengthStringIO(txt, txt.tell() - 1) t1 = t1.previous break if (Utils.getCharAtStringIO(txt, i - 1) == '.' or Utils.getCharAtStringIO(txt, i + 1) == '.'): return None points += 1 i += 1 if (points == 0): return None uri_ = Utils.toStringStringIO(txt) if (check_): ok = is_ip if (not is_ip): if (Utils.toStringStringIO(txt) == "localhost"): ok = True if (not ok and t1.previous is not None and t1.previous.is_char('.')): if (UriItemToken.__m_std_groups.try_parse( t1, TerminParseAttr.NO) is not None): ok = True if (not ok): return None return UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt).lower())
def try_parse(t: 'Token', add_units: 'TerminCollection', prev: 'UnitToken', parse_unknown_units: bool = False) -> 'UnitToken': if (t is None): return None t0 = t pow0__ = 1 is_neg = False if ((t.is_char_of("\\/") or t.is_value("НА", None) or t.is_value("OF", None)) or t.is_value("PER", None)): is_neg = True t = t.next0_ elif (t.is_value("В", None) and prev is not None): is_neg = True t = t.next0_ elif (MeasureHelper.is_mult_char(t)): t = t.next0_ tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): return None if (tt.term == "КВ" or tt.term == "КВАДР" or tt.is_value("КВАДРАТНЫЙ", None)): pow0__ = 2 tt = (Utils.asObjectOrNull(tt.next0_, TextToken)) if (tt is not None and tt.is_char('.')): tt = (Utils.asObjectOrNull(tt.next0_, TextToken)) if (tt is None): return None elif (tt.term == "КУБ" or tt.term == "КУБИЧ" or tt.is_value("КУБИЧЕСКИЙ", None)): pow0__ = 3 tt = (Utils.asObjectOrNull(tt.next0_, TextToken)) if (tt is not None and tt.is_char('.')): tt = (Utils.asObjectOrNull(tt.next0_, TextToken)) if (tt is None): return None elif (tt.term == "µ"): res = UnitToken.try_parse(tt.next0_, add_units, prev, False) if (res is not None): for u in UnitsHelper.UNITS: if (u.factor == UnitsFactors.MICRO and Utils.compareStrings("мк" + u.name_cyr, res.unit.name_cyr, True) == 0): res.unit = u res.begin_token = tt res.pow0_ = pow0__ if (is_neg): res.pow0_ = (-pow0__) return res toks = UnitsHelper.TERMINS.try_parse_all(tt, TerminParseAttr.NO) if (toks is not None): if ((prev is not None and tt == t0 and len(toks) == 1) and t.is_whitespace_before): return None if (toks[0].begin_token == toks[0].end_token and tt.morph.class0_.is_preposition and (tt.whitespaces_after_count < 3)): if (NounPhraseHelper.try_parse( tt, NounPhraseParseAttr.PARSEPREPOSITION, 0, None) is not None): return None if (isinstance(tt.next0_, NumberToken)): if (tt.next0_.typ != NumberSpellingType.DIGIT): return None nex = UnitToken.try_parse(tt.next0_, add_units, None, False) if (nex is not None): return None if (toks[0].begin_token == toks[0].end_token and ((toks[0].begin_token.is_value("М", None) or toks[0].begin_token.is_value("M", None))) and toks[0].begin_token.chars.is_all_lower): if (prev is not None and prev.unit is not None and prev.unit.kind == MeasureKind.LENGTH): res = UnitToken._new1626(t0, toks[0].end_token, UnitsHelper.UMINUTE) res.pow0_ = pow0__ if (is_neg): res.pow0_ = (-pow0__) return res uts = list() for tok in toks: res = UnitToken._new1626( t0, tok.end_token, Utils.asObjectOrNull(tok.termin.tag, Unit)) res.pow0_ = pow0__ if (is_neg): res.pow0_ = (-pow0__) if (res.unit.base_multiplier == 1000000 and (isinstance(t0, TextToken)) and str.islower(t0.get_source_text()[0])): for u in UnitsHelper.UNITS: if (u.factor == UnitsFactors.MILLI and Utils.compareStrings( u.name_cyr, res.unit.name_cyr, True) == 0): res.unit = u break res.__correct() res.__check_doubt() uts.append(res) max0_ = 0 best = None for ut in uts: if (ut.keyword is not None): if (ut.keyword.begin_char >= max0_): max0_ = ut.keyword.begin_char best = ut if (best is not None): return best for ut in uts: if (not ut.is_doubt): return ut return uts[0] t1 = None if (t.is_char_of("º°")): t1 = t elif ((t.is_char('<') and t.next0_ is not None and t.next0_.next0_ is not None) and t.next0_.next0_.is_char('>') and ((t.next0_.is_value("О", None) or t.next0_.is_value("O", None) or (((isinstance(t.next0_, NumberToken)) and t.next0_.value == "0"))))): t1 = t.next0_.next0_ if (t1 is not None): res = UnitToken._new1626(t0, t1, UnitsHelper.UGRADUS) res.__check_doubt() t = t1.next0_ if (t is not None and t.is_comma): t = t.next0_ if (t is not None and t.is_value("ПО", None)): t = t.next0_ if (isinstance(t, TextToken)): vv = t.term if (vv == "C" or vv == "С" or vv.startswith("ЦЕЛЬС")): res.unit = UnitsHelper.UGRADUSC res.is_doubt = False res.end_token = t if (vv == "F" or vv.startswith("ФАР")): res.unit = UnitsHelper.UGRADUSF res.is_doubt = False res.end_token = t return res if ((isinstance(t, TextToken)) and ((t.is_value("ОС", None) or t.is_value("OC", None)))): str0_ = t.get_source_text() if (str0_ == "оС" or str0_ == "oC"): res = UnitToken._new1738(t, t, UnitsHelper.UGRADUSC, False) return res if (t.is_char('%')): tt1 = t.next0_ if (tt1 is not None and tt1.is_char('(')): tt1 = tt1.next0_ if ((isinstance(tt1, TextToken)) and tt1.term.startswith("ОБ")): re = UnitToken._new1626(t, tt1, UnitsHelper.UALCO) if (re.end_token.next0_ is not None and re.end_token.next0_.is_char('.')): re.end_token = re.end_token.next0_ if (re.end_token.next0_ is not None and re.end_token.next0_.is_char(')') and t.next0_.is_char('(')): re.end_token = re.end_token.next0_ return re return UnitToken._new1626(t, t, UnitsHelper.UPERCENT) if (add_units is not None): tok = add_units.try_parse(t, TerminParseAttr.NO) if (tok is not None): res = UnitToken._new1741( t0, tok.end_token, Utils.asObjectOrNull(tok.termin.tag, UnitReferent)) if (tok.end_token.next0_ is not None and tok.end_token.next0_.is_char('.')): tok.end_token = tok.end_token.next0_ res.pow0_ = pow0__ if (is_neg): res.pow0_ = (-pow0__) res.__correct() return res if (not parse_unknown_units): return None if ((t.whitespaces_before_count > 2 or not t.chars.is_letter or t.length_char > 5) or not (isinstance(t, TextToken))): return None if (MiscHelper.can_be_start_of_sentence(t)): return None t1 = t if (t.next0_ is not None and t.next0_.is_char('.')): t1 = t ok = False if (t1.next0_ is None or t1.whitespaces_after_count > 2): ok = True elif (t1.next0_.is_comma or t1.next0_.is_char_of("\\/") or t1.next0_.is_table_control_char): ok = True elif (MeasureHelper.is_mult_char(t1.next0_)): ok = True if (not ok): return None mc = t.get_morph_class_in_dictionary() if (mc.is_undefined): pass elif (t.length_char > 7): return None res1 = UnitToken._new1742(t0, t1, pow0__, True) res1.unknown_name = t.get_source_text() res1.__correct() return res1
def __tryAttach(self, t : 'Token', key_word : bool) -> 'ReferentToken': if (t is None): return None t0 = t t1 = t uris_keys = None uris = None org0_ = None cor_org = None org_is_bank = False empty = 0 last_uri = None first_pass2749 = True while True: if first_pass2749: first_pass2749 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char and t != t0): break if (t.is_comma or t.morph.class0_.is_preposition or t.isCharOf("/\\")): continue bank_keyword = False if (t.isValue("ПОЛНЫЙ", None) and t.next0_ is not None and ((t.next0_.isValue("НАИМЕНОВАНИЕ", None) or t.next0_.isValue("НАЗВАНИЕ", None)))): t = t.next0_.next0_ if (t is None): break if (t.isValue("БАНК", None)): if ((isinstance(t, ReferentToken)) and t.getReferent().type_name == "ORGANIZATION"): bank_keyword = True tt = t.next0_ npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0) if (npt is not None): tt = npt.end_token.next0_ if (tt is not None and tt.isChar(':')): tt = tt.next0_ if (tt is not None): if (not bank_keyword): t = tt bank_keyword = True elif (tt.getReferent() is not None and tt.getReferent().type_name == "ORGANIZATION"): t = tt r = t.getReferent() if (r is not None and r.type_name == "ORGANIZATION"): is_bank = False kk = 0 rr = r while rr is not None and (kk < 4): is_bank = Utils.compareStrings(Utils.ifNotNull(rr.getStringValue("KIND"), ""), "Bank", True) == 0 if (is_bank): break rr = rr.parent_referent; kk += 1 if (not is_bank and bank_keyword): is_bank = True if (not is_bank and uris is not None and "ИНН" in uris_keys): return None if ((last_uri is not None and last_uri.scheme == "К/С" and t.previous is not None) and t.previous.isValue("В", None)): cor_org = r t1 = t elif (org0_ is None or ((not org_is_bank and is_bank))): org0_ = r t1 = t org_is_bank = is_bank if (is_bank): continue if (uris is None and not key_word): return None continue if (isinstance(r, UriReferent)): u = Utils.asObjectOrNull(r, UriReferent) if (uris is None): if (not BankAnalyzer.__isBankReq(u.scheme)): return None if (u.scheme == "ИНН" and t.is_newline_after): return None uris = list() uris_keys = list() else: if (not BankAnalyzer.__isBankReq(u.scheme)): break if (u.scheme in uris_keys): break if (u.scheme == "ИНН"): if (empty > 0): break uris_keys.append(u.scheme) uris.append(u) last_uri = u t1 = t empty = 0 continue elif (uris is None and not key_word and not org_is_bank): return None if (r is not None and ((r.type_name == "GEO" or r.type_name == "ADDRESS"))): empty += 1 continue if (isinstance(t, TextToken)): if (t.isValue("ПОЛНЫЙ", None) or t.isValue("НАИМЕНОВАНИЕ", None) or t.isValue("НАЗВАНИЕ", None)): pass elif (t.chars.is_letter): tok = BankAnalyzer.__m_ontology.tryParse(t, TerminParseAttr.NO) if (tok is not None): t = tok.end_token empty = 0 else: empty += 1 if (t.is_newline_before): nnn = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (nnn is not None and nnn.end_token.next0_ is not None and nnn.end_token.next0_.isChar(':')): break if (uris is None): break if (empty > 2): break if (empty > 0 and t.isChar(':') and t.is_newline_after): break if (((isinstance(t, NumberToken)) and t.is_newline_before and t.next0_ is not None) and not t.next0_.chars.is_letter): break if (uris is None): return None if (not "Р/С" in uris_keys and not "Л/С" in uris_keys): return None ok = False if ((len(uris) < 2) and org0_ is None): return None bdr = BankDataReferent() for u in uris: bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0) if (org0_ is not None): bdr.addSlot(BankDataReferent.ATTR_BANK, org0_, False, 0) if (cor_org is not None): bdr.addSlot(BankDataReferent.ATTR_CORBANK, cor_org, False, 0) org0 = (None if t0.previous is None else t0.previous.getReferent()) if (org0 is not None and org0.type_name == "ORGANIZATION"): for s in org0.slots: if (isinstance(s.value, UriReferent)): u = Utils.asObjectOrNull(s.value, UriReferent) if (BankAnalyzer.__isBankReq(u.scheme)): if (not u.scheme in uris_keys): bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0) return ReferentToken(bdr, t0, t1)
def can_be_equals(self, obj: 'Referent', typ: 'ReferentsEqualType') -> bool: uri_ = Utils.asObjectOrNull(obj, UriReferent) if (uri_ is None): return False return Utils.compareStrings(self.value, uri_.value, True) == 0
def __TryParse(t: 'Token', prev: 'TransItemToken', after_conj: bool, attach_high: bool = False) -> 'TransItemToken': if (t is None): return None t1 = t if (t1.isChar(',')): t1 = t1.next0_ if (t1 is not None and t1.isValue("ПРИНАДЛЕЖАТЬ", "НАЛЕЖАТИ")): t1 = t1.next0_ if (isinstance(t1, ReferentToken)): if (t1.getReferent().type_name == "ORGANIZATION"): return TransItemToken._new2521(t, t1, TransItemToken.Typs.ORG, t1.getReferent(), t1.morph) route = False if (t1 is not None and ((t1.isValue("СЛЕДОВАТЬ", "СЛІДУВАТИ") or t1.isValue("ВЫПОЛНЯТЬ", "ВИКОНУВАТИ")))): t1 = t1.next0_ route = True if (t1 is not None and t1.morph.class0_.is_preposition): t1 = t1.next0_ if (t1 is not None and ((t1.isValue("РЕЙС", None) or t1.isValue("МАРШРУТ", None)))): t1 = t1.next0_ route = True if (isinstance(t1, ReferentToken)): if (isinstance(t1.getReferent(), GeoReferent)): geo_ = Utils.asObjectOrNull(t1.getReferent(), GeoReferent) if (geo_.is_state or geo_.is_city): tit = TransItemToken._new2522(t, t1, TransItemToken.Typs.ROUTE, list()) tit.route_items.append(geo_) t1 = t1.next0_ first_pass3132 = True while True: if first_pass3132: first_pass3132 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_hiphen): continue if (t1.morph.class0_.is_preposition or t1.morph.class0_.is_conjunction): continue geo_ = (Utils.asObjectOrNull(t1.getReferent(), GeoReferent)) if (geo_ is None): break if (not geo_.is_city and not geo_.is_state): break tit.route_items.append(geo_) tit.end_token = t1 if (len(tit.route_items) > 1 or route): return tit elif ((isinstance(t1.getReferent(), DateReferent)) and (t1.whitespaces_before_count < 3)): tit = TransItemToken._new2523(t, t1, TransItemToken.Typs.DATE, t1.getReferent()) if (t1.next0_ is not None): if (t1.next0_.isValue("В", None) and t1.next0_.next0_ is not None and t1.next0_.next0_.isChar('.')): tit.end_token = t1.next0_.next0_ elif (t1.next0_.isValue("ВЫП", None) or t1.next0_.isValue("ВЫПУСК", None)): tit.end_token = t1.next0_ if (t1.next0_.next0_ is not None and t1.next0_.next0_.isChar('.')): tit.end_token = t1.next0_.next0_ return tit if (isinstance(t, TextToken)): num = MiscHelper.checkNumberPrefix(t) if (num is not None): tit = TransItemToken.__attachRusAutoNumber(num) if (tit is None): tit = TransItemToken._attachNumber(num, False) if (tit is not None): tit.begin_token = t return tit tok = TransItemToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO) if (tok is None and ((t.isValue("С", None) or t.isValue("C", None) or t.isValue("ЗА", None)))): tok = TransItemToken.M_ONTOLOGY.tryParse( t.next0_, TerminParseAttr.NO) if (tok is None and BracketHelper.isBracket(t, True)): tok1 = TransItemToken.M_ONTOLOGY.tryParse( t.next0_, TerminParseAttr.NO) if (tok1 is not None and BracketHelper.isBracket( tok1.end_token.next0_, True)): tok = tok1 tok.begin_token = t tok.end_token = tok.end_token.next0_ tok.begin_token = t elif (tok1 is not None): tt = Utils.asObjectOrNull(tok1.termin, TransItemToken.TransTermin) if (tt.typ == TransItemToken.Typs.BRAND): tok = tok1 tok.begin_token = t if (tok is None and t.isValue("МАРКА", None)): res1 = TransItemToken.__TryParse(t.next0_, prev, after_conj, False) if (res1 is not None): if (res1.typ == TransItemToken.Typs.NAME or res1.typ == TransItemToken.Typs.BRAND): res1.begin_token = t res1.typ = TransItemToken.Typs.BRAND return res1 if (tok is not None): tt = Utils.asObjectOrNull(tok.termin, TransItemToken.TransTermin) if (tt.typ == TransItemToken.Typs.NUMBER): tit = TransItemToken.__attachRusAutoNumber( tok.end_token.next0_) if (tit is None): tit = TransItemToken._attachNumber( tok.end_token.next0_, False) if (tit is not None): tit.begin_token = t return tit else: return None if (tt.is_doubt and not attach_high): if (prev is None or prev.typ != TransItemToken.Typs.NOUN): if ((prev is not None and prev.typ == TransItemToken.Typs.BRAND and tt.typ == TransItemToken.Typs.BRAND) and Utils.compareStrings( tt.canonic_text, prev.value, True) == 0): pass else: return None if (tt.canonic_text == "СУДНО"): if ((((tok.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): if (not BracketHelper.canBeStartOfSequence( tok.end_token.next0_, False, False)): return None tit = TransItemToken._new2524(tok.begin_token, tok.end_token, tt.kind, tt.typ, tt.is_doubt, tok.chars, tok.morph) tit.value = tt.canonic_text if (tit.typ == TransItemToken.Typs.NOUN): tit.value = tit.value.lower() else: tit.value = tit.value.upper() return tit if (tok is None and t.morph.class0_.is_adjective): npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None and len(npt.adjectives) > 0): state_ = None tt = t first_pass3133 = True while True: if first_pass3133: first_pass3133 = False else: tt = tt.next0_ if (not (tt is not None and tt.previous != npt.end_token)): break tok = TransItemToken.M_ONTOLOGY.tryParse( tt, TerminParseAttr.NO) if (tok is None and state_ is None): state_ = tt.kit.processReferent("GEO", tt) if (tok is not None and tok.end_token == npt.end_token): if ((tok.termin).typ == TransItemToken.Typs.NOUN): tit = TransItemToken._new2524( t, tok.end_token, (tok.termin).kind, TransItemToken.Typs.NOUN, (tok.termin).is_doubt, tok.chars, npt.morph) tit.value = (tok.termin).canonic_text.lower() tit.alt_value = npt.getNormalCaseText( None, False, MorphGender.UNDEFINED, False).lower() if (LanguageHelper.endsWithEx( tit.alt_value, "суд", "суда", None, None)): if (not BracketHelper.canBeStartOfSequence( tok.end_token.next0_, False, False)): continue if (state_ is not None): if ((state_.referent).is_state): tit.state = state_ return tit if (t is not None and t.isValue("КЛАСС", None) and t.next0_ is not None): br = BracketHelper.tryParse(t.next0_, BracketParseAttr.NO, 100) if (br is not None): return TransItemToken._new2526( t, br.end_token, TransItemToken.Typs.CLASS, MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO)) nt = Utils.asObjectOrNull(t, NumberToken) if (nt is not None): if (prev is None or nt.typ != NumberSpellingType.DIGIT): return None if (prev.typ == TransItemToken.Typs.BRAND): return TransItemToken.__attachModel(t, False, prev) else: return None res = TransItemToken.__attachRusAutoNumber(t) if ((res) is not None): if (not res.is_doubt): return res if (prev is not None and prev.typ == TransItemToken.Typs.NOUN and prev.kind == TransportKind.AUTO): return res if (prev is not None and ((prev.typ == TransItemToken.Typs.BRAND or prev.typ == TransItemToken.Typs.MODEL))): return res t1 = t if (t.is_hiphen): t1 = t.next0_ if (prev is not None and prev.typ == TransItemToken.Typs.BRAND and t1 is not None): tit = TransItemToken.__attachModel(t1, True, prev) if (tit is not None): tit.begin_token = t return tit if (prev is not None and ((prev.typ == TransItemToken.Typs.NOUN or after_conj))): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None and br.is_quote_type): tit = TransItemToken.tryParse(br.begin_token.next0_, prev, after_conj, False) if (tit is not None and tit.end_token.next0_ == br.end_token): if (not tit.is_doubt or tit.typ == TransItemToken.Typs.BRAND): tit.begin_token = br.begin_token tit.end_token = br.end_token return tit s = MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO) if (not Utils.isNullOrEmpty(s) and (len(s) < 30)): chars_ = 0 digs = 0 un = 0 for c in s: if (not Utils.isWhitespace(c)): if (str.isalpha(c)): chars_ += 1 elif (str.isdigit(c)): digs += 1 else: un += 1 if (((digs == 0 and un == 0 and t.next0_.chars.is_capital_upper)) or prev.kind == TransportKind.SHIP or prev.kind == TransportKind.SPACE): return TransItemToken._new2526( br.begin_token, br.end_token, TransItemToken.Typs.NAME, s) if (digs > 0 and (chars_ < 5)): return TransItemToken._new2526( br.begin_token, br.end_token, TransItemToken.Typs.MODEL, s.replace(" ", "")) if (prev is not None and (((prev.typ == TransItemToken.Typs.NOUN or prev.typ == TransItemToken.Typs.BRAND or prev.typ == TransItemToken.Typs.NAME) or prev.typ == TransItemToken.Typs.MODEL))): tit = TransItemToken.__attachModel( t, prev.typ != TransItemToken.Typs.NAME, prev) if (tit is not None): return tit if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN and prev.kind == TransportKind.AUTO) and (isinstance(t, TextToken)) and t.chars.is_letter) and not t.chars.is_all_lower and (t.whitespaces_before_count < 2)): pt = t.kit.processReferent("PERSON", t) if (pt is None): tit = TransItemToken._new2529(t, t, TransItemToken.Typs.BRAND) tit.value = (t).term return tit if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN and ((prev.kind == TransportKind.SHIP or prev.kind == TransportKind.SPACE)))) or after_conj): if (t.chars.is_capital_upper): ok = True npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None and len(npt.adjectives) > 0): ok = False else: rt = t.kit.processReferent("PERSON", t) if (rt is not None): ok = False if (t.getMorphClassInDictionary().is_proper_surname): if (not t.morph.case_.is_nominative): ok = False if (ok): t1 = t tt = t.next0_ while tt is not None: if (tt.whitespaces_before_count > 1): break if (tt.chars != t.chars): break tit = TransItemToken.tryParse(tt, None, False, False) if ((tit) is not None): break t1 = tt tt = tt.next0_ s = MiscHelper.getTextValue(t, t1, GetTextAttr.NO) if (s is not None): res1 = TransItemToken._new2530( t, t1, TransItemToken.Typs.NAME, True, s) if (not t1.is_newline_after): br = BracketHelper.tryParse( t1.next0_, BracketParseAttr.NO, 100) if (br is not None): res1.end_token = br.end_token res1.alt_value = res1.value res1.value = MiscHelper.getTextValueOfMetaToken( br, GetTextAttr.NO) return res1 return None