Пример #1
0
 def compare_to(self, other: 'NextModelItem') -> int:
     i = Utils.compareStrings(self.preposition, other.preposition, False)
     if (i != 0):
         return i
     if (self.__cas_rank() < other.__cas_rank()):
         return -1
     if (self.__cas_rank() > other.__cas_rank()):
         return 1
     return 0
 def __compare_to(self, other: 'ControlModelQuestion') -> int:
     i = Utils.compareStrings(self.preposition, other.preposition, False)
     if (i != 0):
         return i
     if (self.__cas_rank() < other.__cas_rank()):
         return -1
     if (self.__cas_rank() > other.__cas_rank()):
         return 1
     return 0
Пример #3
0
 def convert_outer_value_to_inner_value(self, outer_value: str) -> str:
     if (outer_value is None):
         return None
     i = 0
     while i < len(self.outer_values):
         if (Utils.compareStrings(self.outer_values[i], outer_value, True)
                 == 0 and (i < len(self.inner_values))):
             return self.inner_values[i]
         elif ((i < len(self.outer_valuesua))
               and self.outer_valuesua[i] == outer_value):
             return self.inner_values[i]
         i += 1
     return outer_value
Пример #4
0
 def convertOuterValueToInnerValue(self, outer_value: object) -> object:
     val = Utils.asObjectOrNull(outer_value, str)
     if (val is None):
         return outer_value
     i = 0
     while i < len(self.outer_values):
         if (Utils.compareStrings(self.outer_values[i], val, True) == 0
                 and (i < len(self.inner_values))):
             return self.inner_values[i]
         elif ((i < len(self.outer_valuesua))
               and self.outer_valuesua[i] == val):
             return self.inner_values[i]
         i += 1
     return outer_value
Пример #5
0
 def convert_inner_value_to_outer_value(self,
                                        inner_value: str,
                                        lang: 'MorphLang' = None) -> str:
     if (inner_value is None):
         return None
     val = str(inner_value)
     i = 0
     while i < len(self.inner_values):
         if (Utils.compareStrings(self.inner_values[i], val, True) == 0
                 and (i < len(self.outer_values))):
             if (lang is not None):
                 if (lang.is_ua and (i < len(self.outer_valuesua))
                         and self.outer_valuesua[i] is not None):
                     return self.outer_valuesua[i]
                 if (lang.is_en and (i < len(self.outer_valuesen))
                         and self.outer_valuesen[i] is not None):
                     return self.outer_valuesen[i]
             return self.outer_values[i]
         i += 1
     return inner_value
Пример #6
0
 def __compareValues(self, val1: object, val2: object,
                     use_can_be_equals_for_referents: bool) -> bool:
     if (val1 is None):
         return val2 is None
     if (val2 is None):
         return val1 is None
     if (val1 == val2):
         return True
     if ((isinstance(val1, Referent)) and (isinstance(val2, Referent))):
         if (use_can_be_equals_for_referents):
             return (val1).canBeEquals(Utils.asObjectOrNull(val2, Referent),
                                       Referent.EqualType.DIFFERENTTEXTS)
         else:
             return False
     if (isinstance(val1, str)):
         if (not ((isinstance(val2, str)))):
             return False
         s1 = val1
         s2 = val2
         i = Utils.compareStrings(s1, s2, True)
         return i == 0
     return val1 == val2
Пример #7
0
 def __attach_uri_content(
         t0: 'Token',
         chars_: str,
         can_be_whitespaces: bool = False) -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     dom = UriItemToken.attach_domain_name(t0, True, can_be_whitespaces)
     if (dom is not None):
         if (len(dom.value) < 3):
             return None
     open_char = chr(0)
     t = t0
     if (dom is not None):
         t = dom.end_token.next0_
     first_pass3411 = True
     while True:
         if first_pass3411: first_pass3411 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t != t0 and t.is_whitespace_before):
             if (t.is_newline_before or not can_be_whitespaces):
                 break
             if (dom is None):
                 break
             if (t.previous.is_hiphen):
                 pass
             elif (t.previous.is_char_of(",;")):
                 break
             elif (t.previous.is_char('.') and t.chars.is_letter
                   and t.length_char == 2):
                 pass
             else:
                 ok = False
                 tt1 = t
                 if (t.is_char_of("\\/")):
                     tt1 = t.next0_
                 tt0 = tt1
                 first_pass3412 = True
                 while True:
                     if first_pass3412: first_pass3412 = False
                     else: tt1 = tt1.next0_
                     if (not (tt1 is not None)): break
                     if (tt1 != tt0 and tt1.is_whitespace_before):
                         break
                     if (isinstance(tt1, NumberToken)):
                         continue
                     if (not (isinstance(tt1, TextToken))):
                         break
                     term1 = tt1.term
                     if (((term1 == "HTM" or term1 == "HTML" or term1
                           == "SHTML") or term1 == "ASP" or term1 == "ASPX")
                             or term1 == "JSP"):
                         ok = True
                         break
                     if (not tt1.chars.is_letter):
                         if (tt1.is_char_of("\\/")):
                             ok = True
                             break
                         if (not tt1.is_char_of(chars_)):
                             break
                     elif (not tt1.chars.is_latin_letter):
                         break
                 if (not ok):
                     break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             print(nt.get_source_text(), end="", file=txt)
             t1 = t
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             rt = Utils.asObjectOrNull(t, ReferentToken)
             if (rt is not None and rt.begin_token.is_value("РФ", None)):
                 if (txt.tell() > 0 and Utils.getCharAtStringIO(
                         txt,
                         txt.tell() - 1) == '.'):
                     print(rt.begin_token.get_source_text(),
                           end="",
                           file=txt)
                     t1 = t
                     continue
             if (rt is not None and rt.chars.is_latin_letter
                     and rt.begin_token == rt.end_token):
                 print(rt.begin_token.get_source_text(), end="", file=txt)
                 t1 = t
                 continue
             break
         src = tt.get_source_text()
         ch = src[0]
         if (not str.isalpha(ch)):
             if (chars_.find(ch) < 0):
                 break
             if (ch == '(' or ch == '['):
                 open_char = ch
             elif (ch == ')'):
                 if (open_char != '('):
                     break
                 open_char = (chr(0))
             elif (ch == ']'):
                 if (open_char != '['):
                     break
                 open_char = (chr(0))
         print(src, end="", file=txt)
         t1 = t
     if (txt.tell() == 0):
         return dom
     i = 0
     i = 0
     while i < txt.tell():
         if (str.isalnum(Utils.getCharAtStringIO(txt, i))):
             break
         i += 1
     if (i >= txt.tell()):
         return dom
     if (Utils.getCharAtStringIO(txt,
                                 txt.tell() - 1) == '.'
             or Utils.getCharAtStringIO(txt,
                                        txt.tell() - 1) == '/'):
         Utils.setLengthStringIO(txt, txt.tell() - 1)
         t1 = t1.previous
     if (dom is not None):
         Utils.insertStringIO(txt, 0, dom.value)
     tmp = Utils.toStringStringIO(txt)
     if (tmp.startswith("\\\\")):
         Utils.replaceStringIO(txt, "\\\\", "//")
         tmp = Utils.toStringStringIO(txt)
     if (tmp.startswith("//")):
         tmp = tmp[2:]
     if (Utils.compareStrings(tmp, "WWW", True) == 0):
         return None
     res = UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
     return res
Пример #8
0
 def attach_domain_name(t0: 'Token', check_: bool,
                        can_be_whitspaces: bool) -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     ip_count = 0
     is_ip = True
     t = t0
     first_pass3413 = True
     while True:
         if first_pass3413: first_pass3413 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_whitespace_before and t != t0):
             ok = False
             if (not t.is_newline_before and can_be_whitspaces):
                 tt1 = t
                 first_pass3414 = True
                 while True:
                     if first_pass3414: first_pass3414 = False
                     else: tt1 = tt1.next0_
                     if (not (tt1 is not None)): break
                     if (tt1.is_char('.') or tt1.is_hiphen):
                         continue
                     if (tt1.is_whitespace_before):
                         if (tt1.is_newline_before):
                             break
                         if (tt1.previous is not None
                                 and ((tt1.previous.is_char('.')
                                       or tt1.previous.is_hiphen))):
                             pass
                         else:
                             break
                     if (not (isinstance(tt1, TextToken))):
                         break
                     if (UriItemToken.__m_std_groups.try_parse(
                             tt1, TerminParseAttr.NO) is not None):
                         ok = True
                         break
                     if (not tt1.chars.is_latin_letter):
                         break
             if (not ok):
                 break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             if (nt.int_value is None):
                 break
             print(nt.get_source_text(), end="", file=txt)
             t1 = t
             if (nt.typ == NumberSpellingType.DIGIT and nt.int_value >= 0
                     and (nt.int_value < 256)):
                 ip_count += 1
             else:
                 is_ip = False
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             break
         src = tt.term
         ch = src[0]
         if (not str.isalpha(ch)):
             if (".-_".find(ch) < 0):
                 break
             if (ch != '.'):
                 is_ip = False
             if (ch == '-'):
                 if (Utils.compareStrings(Utils.toStringStringIO(txt),
                                          "vk.com", True) == 0):
                     return UriItemToken._new2706(
                         t0, t1,
                         Utils.toStringStringIO(txt).lower())
         else:
             is_ip = False
         print(src.lower(), end="", file=txt)
         t1 = t
     if (txt.tell() == 0):
         return None
     if (ip_count != 4):
         is_ip = False
     i = 0
     points = 0
     i = 0
     while i < txt.tell():
         if (Utils.getCharAtStringIO(txt, i) == '.'):
             if (i == 0):
                 return None
             if (i >= (txt.tell() - 1)):
                 Utils.setLengthStringIO(txt, txt.tell() - 1)
                 t1 = t1.previous
                 break
             if (Utils.getCharAtStringIO(txt, i - 1) == '.'
                     or Utils.getCharAtStringIO(txt, i + 1) == '.'):
                 return None
             points += 1
         i += 1
     if (points == 0):
         return None
     uri_ = Utils.toStringStringIO(txt)
     if (check_):
         ok = is_ip
         if (not is_ip):
             if (Utils.toStringStringIO(txt) == "localhost"):
                 ok = True
         if (not ok and t1.previous is not None
                 and t1.previous.is_char('.')):
             if (UriItemToken.__m_std_groups.try_parse(
                     t1, TerminParseAttr.NO) is not None):
                 ok = True
         if (not ok):
             return None
     return UriItemToken._new2706(t0, t1,
                                  Utils.toStringStringIO(txt).lower())
Пример #9
0
 def try_parse(t: 'Token',
               add_units: 'TerminCollection',
               prev: 'UnitToken',
               parse_unknown_units: bool = False) -> 'UnitToken':
     if (t is None):
         return None
     t0 = t
     pow0__ = 1
     is_neg = False
     if ((t.is_char_of("\\/") or t.is_value("НА", None)
          or t.is_value("OF", None)) or t.is_value("PER", None)):
         is_neg = True
         t = t.next0_
     elif (t.is_value("В", None) and prev is not None):
         is_neg = True
         t = t.next0_
     elif (MeasureHelper.is_mult_char(t)):
         t = t.next0_
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is None):
         return None
     if (tt.term == "КВ" or tt.term == "КВАДР"
             or tt.is_value("КВАДРАТНЫЙ", None)):
         pow0__ = 2
         tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is not None and tt.is_char('.')):
             tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is None):
             return None
     elif (tt.term == "КУБ" or tt.term == "КУБИЧ"
           or tt.is_value("КУБИЧЕСКИЙ", None)):
         pow0__ = 3
         tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is not None and tt.is_char('.')):
             tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is None):
             return None
     elif (tt.term == "µ"):
         res = UnitToken.try_parse(tt.next0_, add_units, prev, False)
         if (res is not None):
             for u in UnitsHelper.UNITS:
                 if (u.factor == UnitsFactors.MICRO
                         and Utils.compareStrings("мк" + u.name_cyr,
                                                  res.unit.name_cyr, True)
                         == 0):
                     res.unit = u
                     res.begin_token = tt
                     res.pow0_ = pow0__
                     if (is_neg):
                         res.pow0_ = (-pow0__)
                     return res
     toks = UnitsHelper.TERMINS.try_parse_all(tt, TerminParseAttr.NO)
     if (toks is not None):
         if ((prev is not None and tt == t0 and len(toks) == 1)
                 and t.is_whitespace_before):
             return None
         if (toks[0].begin_token == toks[0].end_token
                 and tt.morph.class0_.is_preposition
                 and (tt.whitespaces_after_count < 3)):
             if (NounPhraseHelper.try_parse(
                     tt, NounPhraseParseAttr.PARSEPREPOSITION, 0, None)
                     is not None):
                 return None
             if (isinstance(tt.next0_, NumberToken)):
                 if (tt.next0_.typ != NumberSpellingType.DIGIT):
                     return None
             nex = UnitToken.try_parse(tt.next0_, add_units, None, False)
             if (nex is not None):
                 return None
         if (toks[0].begin_token == toks[0].end_token
                 and ((toks[0].begin_token.is_value("М", None)
                       or toks[0].begin_token.is_value("M", None)))
                 and toks[0].begin_token.chars.is_all_lower):
             if (prev is not None and prev.unit is not None
                     and prev.unit.kind == MeasureKind.LENGTH):
                 res = UnitToken._new1626(t0, toks[0].end_token,
                                          UnitsHelper.UMINUTE)
                 res.pow0_ = pow0__
                 if (is_neg):
                     res.pow0_ = (-pow0__)
                 return res
         uts = list()
         for tok in toks:
             res = UnitToken._new1626(
                 t0, tok.end_token,
                 Utils.asObjectOrNull(tok.termin.tag, Unit))
             res.pow0_ = pow0__
             if (is_neg):
                 res.pow0_ = (-pow0__)
             if (res.unit.base_multiplier == 1000000
                     and (isinstance(t0, TextToken))
                     and str.islower(t0.get_source_text()[0])):
                 for u in UnitsHelper.UNITS:
                     if (u.factor == UnitsFactors.MILLI
                             and Utils.compareStrings(
                                 u.name_cyr, res.unit.name_cyr, True) == 0):
                         res.unit = u
                         break
             res.__correct()
             res.__check_doubt()
             uts.append(res)
         max0_ = 0
         best = None
         for ut in uts:
             if (ut.keyword is not None):
                 if (ut.keyword.begin_char >= max0_):
                     max0_ = ut.keyword.begin_char
                     best = ut
         if (best is not None):
             return best
         for ut in uts:
             if (not ut.is_doubt):
                 return ut
         return uts[0]
     t1 = None
     if (t.is_char_of("º°")):
         t1 = t
     elif ((t.is_char('<') and t.next0_ is not None
            and t.next0_.next0_ is not None)
           and t.next0_.next0_.is_char('>') and
           ((t.next0_.is_value("О", None) or t.next0_.is_value("O", None) or
             (((isinstance(t.next0_, NumberToken))
               and t.next0_.value == "0"))))):
         t1 = t.next0_.next0_
     if (t1 is not None):
         res = UnitToken._new1626(t0, t1, UnitsHelper.UGRADUS)
         res.__check_doubt()
         t = t1.next0_
         if (t is not None and t.is_comma):
             t = t.next0_
         if (t is not None and t.is_value("ПО", None)):
             t = t.next0_
         if (isinstance(t, TextToken)):
             vv = t.term
             if (vv == "C" or vv == "С" or vv.startswith("ЦЕЛЬС")):
                 res.unit = UnitsHelper.UGRADUSC
                 res.is_doubt = False
                 res.end_token = t
             if (vv == "F" or vv.startswith("ФАР")):
                 res.unit = UnitsHelper.UGRADUSF
                 res.is_doubt = False
                 res.end_token = t
         return res
     if ((isinstance(t, TextToken))
             and ((t.is_value("ОС", None) or t.is_value("OC", None)))):
         str0_ = t.get_source_text()
         if (str0_ == "оС" or str0_ == "oC"):
             res = UnitToken._new1738(t, t, UnitsHelper.UGRADUSC, False)
             return res
     if (t.is_char('%')):
         tt1 = t.next0_
         if (tt1 is not None and tt1.is_char('(')):
             tt1 = tt1.next0_
         if ((isinstance(tt1, TextToken)) and tt1.term.startswith("ОБ")):
             re = UnitToken._new1626(t, tt1, UnitsHelper.UALCO)
             if (re.end_token.next0_ is not None
                     and re.end_token.next0_.is_char('.')):
                 re.end_token = re.end_token.next0_
             if (re.end_token.next0_ is not None
                     and re.end_token.next0_.is_char(')')
                     and t.next0_.is_char('(')):
                 re.end_token = re.end_token.next0_
             return re
         return UnitToken._new1626(t, t, UnitsHelper.UPERCENT)
     if (add_units is not None):
         tok = add_units.try_parse(t, TerminParseAttr.NO)
         if (tok is not None):
             res = UnitToken._new1741(
                 t0, tok.end_token,
                 Utils.asObjectOrNull(tok.termin.tag, UnitReferent))
             if (tok.end_token.next0_ is not None
                     and tok.end_token.next0_.is_char('.')):
                 tok.end_token = tok.end_token.next0_
             res.pow0_ = pow0__
             if (is_neg):
                 res.pow0_ = (-pow0__)
             res.__correct()
             return res
     if (not parse_unknown_units):
         return None
     if ((t.whitespaces_before_count > 2 or not t.chars.is_letter
          or t.length_char > 5) or not (isinstance(t, TextToken))):
         return None
     if (MiscHelper.can_be_start_of_sentence(t)):
         return None
     t1 = t
     if (t.next0_ is not None and t.next0_.is_char('.')):
         t1 = t
     ok = False
     if (t1.next0_ is None or t1.whitespaces_after_count > 2):
         ok = True
     elif (t1.next0_.is_comma or t1.next0_.is_char_of("\\/")
           or t1.next0_.is_table_control_char):
         ok = True
     elif (MeasureHelper.is_mult_char(t1.next0_)):
         ok = True
     if (not ok):
         return None
     mc = t.get_morph_class_in_dictionary()
     if (mc.is_undefined):
         pass
     elif (t.length_char > 7):
         return None
     res1 = UnitToken._new1742(t0, t1, pow0__, True)
     res1.unknown_name = t.get_source_text()
     res1.__correct()
     return res1
Пример #10
0
 def __tryAttach(self, t : 'Token', key_word : bool) -> 'ReferentToken':
     if (t is None): 
         return None
     t0 = t
     t1 = t
     uris_keys = None
     uris = None
     org0_ = None
     cor_org = None
     org_is_bank = False
     empty = 0
     last_uri = None
     first_pass2749 = True
     while True:
         if first_pass2749: first_pass2749 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char and t != t0): 
             break
         if (t.is_comma or t.morph.class0_.is_preposition or t.isCharOf("/\\")): 
             continue
         bank_keyword = False
         if (t.isValue("ПОЛНЫЙ", None) and t.next0_ is not None and ((t.next0_.isValue("НАИМЕНОВАНИЕ", None) or t.next0_.isValue("НАЗВАНИЕ", None)))): 
             t = t.next0_.next0_
             if (t is None): 
                 break
         if (t.isValue("БАНК", None)): 
             if ((isinstance(t, ReferentToken)) and t.getReferent().type_name == "ORGANIZATION"): 
                 bank_keyword = True
             tt = t.next0_
             npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
             if (npt is not None): 
                 tt = npt.end_token.next0_
             if (tt is not None and tt.isChar(':')): 
                 tt = tt.next0_
             if (tt is not None): 
                 if (not bank_keyword): 
                     t = tt
                     bank_keyword = True
                 elif (tt.getReferent() is not None and tt.getReferent().type_name == "ORGANIZATION"): 
                     t = tt
         r = t.getReferent()
         if (r is not None and r.type_name == "ORGANIZATION"): 
             is_bank = False
             kk = 0
             rr = r
             while rr is not None and (kk < 4): 
                 is_bank = Utils.compareStrings(Utils.ifNotNull(rr.getStringValue("KIND"), ""), "Bank", True) == 0
                 if (is_bank): 
                     break
                 rr = rr.parent_referent; kk += 1
             if (not is_bank and bank_keyword): 
                 is_bank = True
             if (not is_bank and uris is not None and "ИНН" in uris_keys): 
                 return None
             if ((last_uri is not None and last_uri.scheme == "К/С" and t.previous is not None) and t.previous.isValue("В", None)): 
                 cor_org = r
                 t1 = t
             elif (org0_ is None or ((not org_is_bank and is_bank))): 
                 org0_ = r
                 t1 = t
                 org_is_bank = is_bank
                 if (is_bank): 
                     continue
             if (uris is None and not key_word): 
                 return None
             continue
         if (isinstance(r, UriReferent)): 
             u = Utils.asObjectOrNull(r, UriReferent)
             if (uris is None): 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     return None
                 if (u.scheme == "ИНН" and t.is_newline_after): 
                     return None
                 uris = list()
                 uris_keys = list()
             else: 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     break
                 if (u.scheme in uris_keys): 
                     break
                 if (u.scheme == "ИНН"): 
                     if (empty > 0): 
                         break
             uris_keys.append(u.scheme)
             uris.append(u)
             last_uri = u
             t1 = t
             empty = 0
             continue
         elif (uris is None and not key_word and not org_is_bank): 
             return None
         if (r is not None and ((r.type_name == "GEO" or r.type_name == "ADDRESS"))): 
             empty += 1
             continue
         if (isinstance(t, TextToken)): 
             if (t.isValue("ПОЛНЫЙ", None) or t.isValue("НАИМЕНОВАНИЕ", None) or t.isValue("НАЗВАНИЕ", None)): 
                 pass
             elif (t.chars.is_letter): 
                 tok = BankAnalyzer.__m_ontology.tryParse(t, TerminParseAttr.NO)
                 if (tok is not None): 
                     t = tok.end_token
                     empty = 0
                 else: 
                     empty += 1
                     if (t.is_newline_before): 
                         nnn = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
                         if (nnn is not None and nnn.end_token.next0_ is not None and nnn.end_token.next0_.isChar(':')): 
                             break
                 if (uris is None): 
                     break
         if (empty > 2): 
             break
         if (empty > 0 and t.isChar(':') and t.is_newline_after): 
             break
         if (((isinstance(t, NumberToken)) and t.is_newline_before and t.next0_ is not None) and not t.next0_.chars.is_letter): 
             break
     if (uris is None): 
         return None
     if (not "Р/С" in uris_keys and not "Л/С" in uris_keys): 
         return None
     ok = False
     if ((len(uris) < 2) and org0_ is None): 
         return None
     bdr = BankDataReferent()
     for u in uris: 
         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     if (org0_ is not None): 
         bdr.addSlot(BankDataReferent.ATTR_BANK, org0_, False, 0)
     if (cor_org is not None): 
         bdr.addSlot(BankDataReferent.ATTR_CORBANK, cor_org, False, 0)
     org0 = (None if t0.previous is None else t0.previous.getReferent())
     if (org0 is not None and org0.type_name == "ORGANIZATION"): 
         for s in org0.slots: 
             if (isinstance(s.value, UriReferent)): 
                 u = Utils.asObjectOrNull(s.value, UriReferent)
                 if (BankAnalyzer.__isBankReq(u.scheme)): 
                     if (not u.scheme in uris_keys): 
                         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     return ReferentToken(bdr, t0, t1)
Пример #11
0
 def can_be_equals(self, obj: 'Referent',
                   typ: 'ReferentsEqualType') -> bool:
     uri_ = Utils.asObjectOrNull(obj, UriReferent)
     if (uri_ is None):
         return False
     return Utils.compareStrings(self.value, uri_.value, True) == 0
Пример #12
0
 def __TryParse(t: 'Token',
                prev: 'TransItemToken',
                after_conj: bool,
                attach_high: bool = False) -> 'TransItemToken':
     if (t is None):
         return None
     t1 = t
     if (t1.isChar(',')):
         t1 = t1.next0_
     if (t1 is not None and t1.isValue("ПРИНАДЛЕЖАТЬ", "НАЛЕЖАТИ")):
         t1 = t1.next0_
     if (isinstance(t1, ReferentToken)):
         if (t1.getReferent().type_name == "ORGANIZATION"):
             return TransItemToken._new2521(t, t1, TransItemToken.Typs.ORG,
                                            t1.getReferent(), t1.morph)
     route = False
     if (t1 is not None and ((t1.isValue("СЛЕДОВАТЬ", "СЛІДУВАТИ")
                              or t1.isValue("ВЫПОЛНЯТЬ", "ВИКОНУВАТИ")))):
         t1 = t1.next0_
         route = True
     if (t1 is not None and t1.morph.class0_.is_preposition):
         t1 = t1.next0_
     if (t1 is not None and
         ((t1.isValue("РЕЙС", None) or t1.isValue("МАРШРУТ", None)))):
         t1 = t1.next0_
         route = True
     if (isinstance(t1, ReferentToken)):
         if (isinstance(t1.getReferent(), GeoReferent)):
             geo_ = Utils.asObjectOrNull(t1.getReferent(), GeoReferent)
             if (geo_.is_state or geo_.is_city):
                 tit = TransItemToken._new2522(t, t1,
                                               TransItemToken.Typs.ROUTE,
                                               list())
                 tit.route_items.append(geo_)
                 t1 = t1.next0_
                 first_pass3132 = True
                 while True:
                     if first_pass3132: first_pass3132 = False
                     else: t1 = t1.next0_
                     if (not (t1 is not None)): break
                     if (t1.is_hiphen):
                         continue
                     if (t1.morph.class0_.is_preposition
                             or t1.morph.class0_.is_conjunction):
                         continue
                     geo_ = (Utils.asObjectOrNull(t1.getReferent(),
                                                  GeoReferent))
                     if (geo_ is None):
                         break
                     if (not geo_.is_city and not geo_.is_state):
                         break
                     tit.route_items.append(geo_)
                     tit.end_token = t1
                 if (len(tit.route_items) > 1 or route):
                     return tit
         elif ((isinstance(t1.getReferent(), DateReferent))
               and (t1.whitespaces_before_count < 3)):
             tit = TransItemToken._new2523(t, t1, TransItemToken.Typs.DATE,
                                           t1.getReferent())
             if (t1.next0_ is not None):
                 if (t1.next0_.isValue("В", None)
                         and t1.next0_.next0_ is not None
                         and t1.next0_.next0_.isChar('.')):
                     tit.end_token = t1.next0_.next0_
                 elif (t1.next0_.isValue("ВЫП", None)
                       or t1.next0_.isValue("ВЫПУСК", None)):
                     tit.end_token = t1.next0_
                     if (t1.next0_.next0_ is not None
                             and t1.next0_.next0_.isChar('.')):
                         tit.end_token = t1.next0_.next0_
             return tit
     if (isinstance(t, TextToken)):
         num = MiscHelper.checkNumberPrefix(t)
         if (num is not None):
             tit = TransItemToken.__attachRusAutoNumber(num)
             if (tit is None):
                 tit = TransItemToken._attachNumber(num, False)
             if (tit is not None):
                 tit.begin_token = t
                 return tit
         tok = TransItemToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO)
         if (tok is None and ((t.isValue("С", None) or t.isValue("C", None)
                               or t.isValue("ЗА", None)))):
             tok = TransItemToken.M_ONTOLOGY.tryParse(
                 t.next0_, TerminParseAttr.NO)
         if (tok is None and BracketHelper.isBracket(t, True)):
             tok1 = TransItemToken.M_ONTOLOGY.tryParse(
                 t.next0_, TerminParseAttr.NO)
             if (tok1 is not None and BracketHelper.isBracket(
                     tok1.end_token.next0_, True)):
                 tok = tok1
                 tok.begin_token = t
                 tok.end_token = tok.end_token.next0_
                 tok.begin_token = t
             elif (tok1 is not None):
                 tt = Utils.asObjectOrNull(tok1.termin,
                                           TransItemToken.TransTermin)
                 if (tt.typ == TransItemToken.Typs.BRAND):
                     tok = tok1
                     tok.begin_token = t
         if (tok is None and t.isValue("МАРКА", None)):
             res1 = TransItemToken.__TryParse(t.next0_, prev, after_conj,
                                              False)
             if (res1 is not None):
                 if (res1.typ == TransItemToken.Typs.NAME
                         or res1.typ == TransItemToken.Typs.BRAND):
                     res1.begin_token = t
                     res1.typ = TransItemToken.Typs.BRAND
                     return res1
         if (tok is not None):
             tt = Utils.asObjectOrNull(tok.termin,
                                       TransItemToken.TransTermin)
             if (tt.typ == TransItemToken.Typs.NUMBER):
                 tit = TransItemToken.__attachRusAutoNumber(
                     tok.end_token.next0_)
                 if (tit is None):
                     tit = TransItemToken._attachNumber(
                         tok.end_token.next0_, False)
                 if (tit is not None):
                     tit.begin_token = t
                     return tit
                 else:
                     return None
             if (tt.is_doubt and not attach_high):
                 if (prev is None or prev.typ != TransItemToken.Typs.NOUN):
                     if ((prev is not None
                          and prev.typ == TransItemToken.Typs.BRAND
                          and tt.typ == TransItemToken.Typs.BRAND)
                             and Utils.compareStrings(
                                 tt.canonic_text, prev.value, True) == 0):
                         pass
                     else:
                         return None
             if (tt.canonic_text == "СУДНО"):
                 if ((((tok.morph.number) & (MorphNumber.PLURAL))) !=
                     (MorphNumber.UNDEFINED)):
                     if (not BracketHelper.canBeStartOfSequence(
                             tok.end_token.next0_, False, False)):
                         return None
             tit = TransItemToken._new2524(tok.begin_token, tok.end_token,
                                           tt.kind, tt.typ, tt.is_doubt,
                                           tok.chars, tok.morph)
             tit.value = tt.canonic_text
             if (tit.typ == TransItemToken.Typs.NOUN):
                 tit.value = tit.value.lower()
             else:
                 tit.value = tit.value.upper()
             return tit
         if (tok is None and t.morph.class0_.is_adjective):
             npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
             if (npt is not None and len(npt.adjectives) > 0):
                 state_ = None
                 tt = t
                 first_pass3133 = True
                 while True:
                     if first_pass3133: first_pass3133 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.previous != npt.end_token)):
                         break
                     tok = TransItemToken.M_ONTOLOGY.tryParse(
                         tt, TerminParseAttr.NO)
                     if (tok is None and state_ is None):
                         state_ = tt.kit.processReferent("GEO", tt)
                     if (tok is not None
                             and tok.end_token == npt.end_token):
                         if ((tok.termin).typ == TransItemToken.Typs.NOUN):
                             tit = TransItemToken._new2524(
                                 t, tok.end_token, (tok.termin).kind,
                                 TransItemToken.Typs.NOUN,
                                 (tok.termin).is_doubt, tok.chars,
                                 npt.morph)
                             tit.value = (tok.termin).canonic_text.lower()
                             tit.alt_value = npt.getNormalCaseText(
                                 None, False, MorphGender.UNDEFINED,
                                 False).lower()
                             if (LanguageHelper.endsWithEx(
                                     tit.alt_value, "суд", "суда", None,
                                     None)):
                                 if (not BracketHelper.canBeStartOfSequence(
                                         tok.end_token.next0_, False,
                                         False)):
                                     continue
                             if (state_ is not None):
                                 if ((state_.referent).is_state):
                                     tit.state = state_
                             return tit
     if (t is not None and t.isValue("КЛАСС", None)
             and t.next0_ is not None):
         br = BracketHelper.tryParse(t.next0_, BracketParseAttr.NO, 100)
         if (br is not None):
             return TransItemToken._new2526(
                 t, br.end_token, TransItemToken.Typs.CLASS,
                 MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO))
     nt = Utils.asObjectOrNull(t, NumberToken)
     if (nt is not None):
         if (prev is None or nt.typ != NumberSpellingType.DIGIT):
             return None
         if (prev.typ == TransItemToken.Typs.BRAND):
             return TransItemToken.__attachModel(t, False, prev)
         else:
             return None
     res = TransItemToken.__attachRusAutoNumber(t)
     if ((res) is not None):
         if (not res.is_doubt):
             return res
         if (prev is not None and prev.typ == TransItemToken.Typs.NOUN
                 and prev.kind == TransportKind.AUTO):
             return res
         if (prev is not None
                 and ((prev.typ == TransItemToken.Typs.BRAND
                       or prev.typ == TransItemToken.Typs.MODEL))):
             return res
     t1 = t
     if (t.is_hiphen):
         t1 = t.next0_
     if (prev is not None and prev.typ == TransItemToken.Typs.BRAND
             and t1 is not None):
         tit = TransItemToken.__attachModel(t1, True, prev)
         if (tit is not None):
             tit.begin_token = t
             return tit
     if (prev is not None
             and ((prev.typ == TransItemToken.Typs.NOUN or after_conj))):
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None and br.is_quote_type):
             tit = TransItemToken.tryParse(br.begin_token.next0_, prev,
                                           after_conj, False)
             if (tit is not None and tit.end_token.next0_ == br.end_token):
                 if (not tit.is_doubt
                         or tit.typ == TransItemToken.Typs.BRAND):
                     tit.begin_token = br.begin_token
                     tit.end_token = br.end_token
                     return tit
             s = MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO)
             if (not Utils.isNullOrEmpty(s) and (len(s) < 30)):
                 chars_ = 0
                 digs = 0
                 un = 0
                 for c in s:
                     if (not Utils.isWhitespace(c)):
                         if (str.isalpha(c)):
                             chars_ += 1
                         elif (str.isdigit(c)):
                             digs += 1
                         else:
                             un += 1
                 if (((digs == 0 and un == 0
                       and t.next0_.chars.is_capital_upper))
                         or prev.kind == TransportKind.SHIP
                         or prev.kind == TransportKind.SPACE):
                     return TransItemToken._new2526(
                         br.begin_token, br.end_token,
                         TransItemToken.Typs.NAME, s)
                 if (digs > 0 and (chars_ < 5)):
                     return TransItemToken._new2526(
                         br.begin_token, br.end_token,
                         TransItemToken.Typs.MODEL, s.replace(" ", ""))
     if (prev is not None and (((prev.typ == TransItemToken.Typs.NOUN
                                 or prev.typ == TransItemToken.Typs.BRAND
                                 or prev.typ == TransItemToken.Typs.NAME)
                                or prev.typ == TransItemToken.Typs.MODEL))):
         tit = TransItemToken.__attachModel(
             t, prev.typ != TransItemToken.Typs.NAME, prev)
         if (tit is not None):
             return tit
     if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN
           and prev.kind == TransportKind.AUTO) and
          (isinstance(t, TextToken)) and t.chars.is_letter)
             and not t.chars.is_all_lower
             and (t.whitespaces_before_count < 2)):
         pt = t.kit.processReferent("PERSON", t)
         if (pt is None):
             tit = TransItemToken._new2529(t, t, TransItemToken.Typs.BRAND)
             tit.value = (t).term
             return tit
     if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN and
           ((prev.kind == TransportKind.SHIP
             or prev.kind == TransportKind.SPACE)))) or after_conj):
         if (t.chars.is_capital_upper):
             ok = True
             npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
             if (npt is not None and len(npt.adjectives) > 0):
                 ok = False
             else:
                 rt = t.kit.processReferent("PERSON", t)
                 if (rt is not None):
                     ok = False
             if (t.getMorphClassInDictionary().is_proper_surname):
                 if (not t.morph.case_.is_nominative):
                     ok = False
             if (ok):
                 t1 = t
                 tt = t.next0_
                 while tt is not None:
                     if (tt.whitespaces_before_count > 1):
                         break
                     if (tt.chars != t.chars):
                         break
                     tit = TransItemToken.tryParse(tt, None, False, False)
                     if ((tit) is not None):
                         break
                     t1 = tt
                     tt = tt.next0_
                 s = MiscHelper.getTextValue(t, t1, GetTextAttr.NO)
                 if (s is not None):
                     res1 = TransItemToken._new2530(
                         t, t1, TransItemToken.Typs.NAME, True, s)
                     if (not t1.is_newline_after):
                         br = BracketHelper.tryParse(
                             t1.next0_, BracketParseAttr.NO, 100)
                         if (br is not None):
                             res1.end_token = br.end_token
                             res1.alt_value = res1.value
                             res1.value = MiscHelper.getTextValueOfMetaToken(
                                 br, GetTextAttr.NO)
                     return res1
     return None