def __canBeGeoAfter(tt: 'Token') -> bool: while tt is not None and ((tt.is_comma or BracketHelper.isBracket(tt, True))): tt = tt.next0_ if (tt is None): return False if (isinstance(tt.getReferent(), GeoReferent)): return True tli = TerrItemToken.tryParseList(tt, None, 2) if (tli is not None and len(tli) > 1): if (tli[0].termin_item is None and tli[1].termin_item is not None): return True elif (tli[0].termin_item is not None and tli[1].termin_item is None): return True if (CityAttachHelper.checkCityAfter(tt)): return True if (TerrAttachHelper.tryAttachStateUSATerritory(tt) is not None): return True return False
def createNickname(pr : 'PersonReferent', t : 'Token') -> 'Token': """ Выделить кличку Args: pr(PersonReferent): t(Token): начальный токен Returns: Token: если не null, то последний токен клички, а в pr запишет саму кличку """ has_keyw = False is_br = False first_pass3097 = True while True: if first_pass3097: first_pass3097 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_hiphen or t.is_comma or t.isCharOf(".:;")): continue if (t.morph.class0_.is_preposition): continue if (t.isChar('(')): is_br = True continue if ((t.isValue("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.isValue("КЛИЧКА", None) or t.isValue("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.isValue("ПСЕВДО", None) or t.isValue("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): has_keyw = True continue break if (not has_keyw or t is None): return None if (BracketHelper.isBracket(t, True)): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = br.end_token tt = t.next0_ first_pass3098 = True while True: if first_pass3098: first_pass3098 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma_and): continue if (not BracketHelper.isBracket(tt, True)): break br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is None): break ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0) tt = br.end_token t = tt if (is_br and t.next0_ is not None and t.next0_.isChar(')')): t = t.next0_ return t else: pli = PersonItemToken.tryAttachList(t, None, PersonItemToken.ParseAttr.NO, 10) if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): ni = MiscHelper.getTextValue(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO) if (ni is not None): pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = pli[len(pli) - 1].end_token if (is_br and t.next0_ is not None and t.next0_.isChar(')')): t = t.next0_ return t return None
def process(self, kit: 'AnalysisKit') -> None: ad = kit.getAnalyzerData(self) models = TerminCollection() objs_by_model = dict() obj_by_names = TerminCollection() t = kit.first_token first_pass3158 = True while True: if first_pass3158: first_pass3158 = False else: t = t.next0_ if (not (t is not None)): break its = WeaponItemToken.tryParseList(t, 10) if (its is None): continue rts = self.__tryAttach(its, False) if (rts is not None): for rt in rts: rt.referent = ad.registerReferent(rt.referent) kit.embedToken(rt) t = (rt) for s in rt.referent.slots: if (s.type_name == WeaponReferent.ATTR_MODEL): mod = str(s.value) for k in range(2): if (not str.isdigit(mod[0])): li = [] wrapli2638 = RefOutArgWrapper(None) inoutres2639 = Utils.tryGetValue( objs_by_model, mod, wrapli2638) li = wrapli2638.value if (not inoutres2639): li = list() objs_by_model[mod] = li if (not rt.referent in li): li.append(rt.referent) models.addStr(mod, li, None, False) if (k > 0): break brand = rt.referent.getStringValue( WeaponReferent.ATTR_BRAND) if (brand is None): break mod = "{0} {1}".format(brand, mod) elif (s.type_name == WeaponReferent.ATTR_NAME): obj_by_names.add( Termin._new117(str(s.value), rt.referent)) if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0): return t = kit.first_token first_pass3159 = True while True: if first_pass3159: first_pass3159 = False else: t = t.next0_ if (not (t is not None)): break br = BracketHelper.tryParse(t, BracketParseAttr.NO, 10) if (br is not None): toks = obj_by_names.tryParse(t.next0_, TerminParseAttr.NO) if (toks is not None and toks.end_token.next0_ == br.end_token): rt0 = ReferentToken( Utils.asObjectOrNull(toks.termin.tag, Referent), br.begin_token, br.end_token) kit.embedToken(rt0) t = (rt0) continue if (not ((isinstance(t, TextToken)))): continue if (not t.chars.is_letter): continue tok = models.tryParse(t, TerminParseAttr.NO) if (tok is None): if (not t.chars.is_all_lower): tok = obj_by_names.tryParse(t, TerminParseAttr.NO) if (tok is None): continue if (not tok.is_whitespace_after): if (tok.end_token.next0_ is None or not tok.end_token.next0_.isCharOf(",.)")): if (not BracketHelper.isBracket(tok.end_token.next0_, False)): continue tr = None li = Utils.asObjectOrNull(tok.termin.tag, list) if (li is not None and len(li) == 1): tr = li[0] else: tr = (Utils.asObjectOrNull(tok.termin.tag, Referent)) if (tr is not None): tit = WeaponItemToken.tryParse(tok.begin_token.previous, None, False, True) if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND): tr.addSlot(WeaponReferent.ATTR_BRAND, tit.value, False, 0) tok.begin_token = tit.begin_token rt0 = ReferentToken(tr, tok.begin_token, tok.end_token) kit.embedToken(rt0) t = (rt0) continue
def tryParse( self, t0: 'Token', pars: 'TerminParseAttr' = TerminParseAttr.NO) -> 'TerminToken': """ Попробовать привязать термин Args: t0(Token): fullWordsOnly: """ from pullenti.ner.core.MiscHelper import MiscHelper from pullenti.ner.core.BracketHelper import BracketHelper if (t0 is None): return None term = None if (isinstance(t0, TextToken)): term = (t0).term if (self.acronym_smart is not None and (((pars) & (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO) and term is not None): if (self.acronym_smart == term): if (t0.next0_ is not None and t0.next0_.isChar('.') and not t0.is_whitespace_after): return TerminToken._new606(t0, t0.next0_, self) else: return TerminToken._new606(t0, t0, self) t1 = Utils.asObjectOrNull(t0, TextToken) tt = Utils.asObjectOrNull(t0, TextToken) i = 0 while i < len(self.acronym): if (tt is None): break term1 = tt.term if (len(term1) != 1 or tt.is_whitespace_after): break if (i > 0 and tt.is_whitespace_before): break if (term1[0] != self.acronym[i]): break if (tt.next0_ is None or not tt.next0_.isChar('.')): break t1 = (Utils.asObjectOrNull(tt.next0_, TextToken)) tt = (Utils.asObjectOrNull(tt.next0_.next0_, TextToken)) i += 1 if (i >= len(self.acronym)): return TerminToken._new606(t0, t1, self) if (self.acronym is not None and term is not None and self.acronym == term): if (t0.chars.is_all_upper or self.acronym_can_be_lower or ((not t0.chars.is_all_lower and len(term) >= 3))): return TerminToken._new606(t0, t0, self) if (self.acronym is not None and t0.chars.is_last_lower and t0.length_char > 3): if (t0.isValue(self.acronym, None)): return TerminToken._new606(t0, t0, self) cou = 0 i = 0 while i < len(self.terms): if (self.terms[i].is_hiphen): cou -= 1 else: cou += 1 i += 1 if (len(self.terms) > 0 and ((not self.ignore_terms_order or cou == 1))): t1 = t0 tt = t0 e0_ = None eup = None ok = True mc = None dont_change_mc = False i = 0 first_pass2812 = True while True: if first_pass2812: first_pass2812 = False else: i += 1 if (not (i < len(self.terms))): break if (self.terms[i].is_hiphen): continue if (tt is not None and tt.is_hiphen and i > 0): tt = tt.next0_ if (i > 0 and tt is not None): if ((((pars) & (TerminParseAttr.IGNOREBRACKETS))) != (TerminParseAttr.NO) and not tt.chars.is_letter and BracketHelper.isBracket(tt, False)): tt = tt.next0_ if (((((pars) & (TerminParseAttr.CANBEGEOOBJECT))) != (TerminParseAttr.NO) and i > 0 and (isinstance(tt, ReferentToken))) and tt.getReferent().type_name == "GEO"): tt = tt.next0_ if ((isinstance(tt, ReferentToken)) and e0_ is None): eup = tt e0_ = (tt).end_token tt = (tt).begin_token if (tt is None): ok = False break if (not self.terms[i].checkByToken(tt)): if (tt.next0_ is not None and tt.isChar('.') and self.terms[i].checkByToken(tt.next0_)): tt = tt.next0_ elif (((i > 0 and tt.next0_ is not None and (isinstance(tt, TextToken))) and ((tt.morph.class0_.is_preposition or MiscHelper.isEngArticle(tt))) and self.terms[i].checkByToken(tt.next0_)) and not self.terms[i - 1].is_pattern_any): tt = tt.next0_ else: ok = False if (((i + 2) < len(self.terms)) and self.terms[i + 1].is_hiphen and self.terms[i + 2].checkByPrefToken( self.terms[i], Utils.asObjectOrNull(tt, TextToken))): i += 2 ok = True elif (((not tt.is_whitespace_after and tt.next0_ is not None and (isinstance(tt, TextToken))) and (tt).length_char == 1 and tt.next0_.isCharOf("\"'`’“”")) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))): if (self.terms[i].checkByStrPrefToken( (tt).term, Utils.asObjectOrNull( tt.next0_.next0_, TextToken))): ok = True tt = tt.next0_.next0_ if (not ok): if (i > 0 and (((pars) & (TerminParseAttr.IGNORESTOPWORDS))) != (TerminParseAttr.NO)): if (isinstance(tt, TextToken)): if (not tt.chars.is_letter): tt = tt.next0_ i -= 1 continue mc1 = tt.getMorphClassInDictionary() if (mc1.is_conjunction or mc1.is_preposition): tt = tt.next0_ i -= 1 continue if (isinstance(tt, NumberToken)): tt = tt.next0_ i -= 1 continue break if (tt.morph.items_count > 0 and not dont_change_mc): mc = MorphCollection(tt.morph) if (((mc.class0_.is_noun or mc.class0_.is_verb)) and not mc.class0_.is_adjective): if (((i + 1) < len(self.terms)) and self.terms[i + 1].is_hiphen): pass else: dont_change_mc = True if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): dont_change_mc = True if (tt == e0_): tt = eup eup = (None) e0_ = (None) if (e0_ is None): t1 = tt tt = tt.next0_ if (ok and i >= len(self.terms)): if (t1.next0_ is not None and t1.next0_.isChar('.') and self.abridges is not None): for a in self.abridges: if (a.tryAttach(t0) is not None): t1 = t1.next0_ break if (t0 != t1 and t0.morph.class0_.is_adjective): npt = NounPhraseHelper.tryParse(t0, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.end_char <= t1.end_char): mc = npt.morph return TerminToken._new611(t0, t1, mc) if (len(self.terms) > 1 and self.ignore_terms_order): terms_ = list(self.terms) t1 = t0 tt = t0 while len(terms_) > 0: if (tt != t0 and tt is not None and tt.is_hiphen): tt = tt.next0_ if (tt is None): break j = 0 while j < len(terms_): if (terms_[j].checkByToken(tt)): break j += 1 if (j >= len(terms_)): if (tt != t0 and (((pars) & (TerminParseAttr.IGNORESTOPWORDS))) != (TerminParseAttr.NO)): if (isinstance(tt, TextToken)): if (not tt.chars.is_letter): tt = tt.next0_ continue mc1 = tt.getMorphClassInDictionary() if (mc1.is_conjunction or mc1.is_preposition): tt = tt.next0_ continue if (isinstance(tt, NumberToken)): tt = tt.next0_ continue break del terms_[j] t1 = tt tt = tt.next0_ for i in range(len(terms_) - 1, -1, -1): if (terms_[i].is_hiphen): del terms_[i] if (len(terms_) == 0): return TerminToken(t0, t1) if (self.abridges is not None and (((pars) & (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO)): res = None for a in self.abridges: r = a.tryAttach(t0) if (r is None): continue if (r.abridge_without_point and len(self.terms) > 0): if (not ((isinstance(t0, TextToken)))): continue if (a.parts[0].value != (t0).term): continue if (res is None or (res.length_char < r.length_char)): res = r if (res is not None): return res return None
def getNameEx(begin: 'Token', end: 'Token', cla: 'MorphClass', mc: 'MorphCase', gender: 'MorphGender' = MorphGender.UNDEFINED, ignore_brackets_and_hiphens: bool = False, ignore_geo_referent: bool = False) -> str: if (end is None or begin is None): return None if (begin.end_char > end.begin_char and begin != end): return None res = io.StringIO() prefix = None t = begin first_pass2809 = True while True: if first_pass2809: first_pass2809 = False else: t = t.next0_ if (not (t is not None and t.end_char <= end.end_char)): break if (res.tell() > 1000): break if (t.is_table_control_char): continue if (ignore_brackets_and_hiphens): if (BracketHelper.isBracket(t, False)): if (t == end): break if (t.isCharOf("(<[")): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None and br.end_char <= end.end_char): tmp = ProperNameHelper.getNameEx( br.begin_token.next0_, br.end_token.previous, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, ignore_brackets_and_hiphens, False) if (tmp is not None): if ((br.end_char == end.end_char and br.begin_token.next0_ == br.end_token.previous and not br.begin_token.next0_.chars.is_letter) and not ((isinstance( br.begin_token.next0_, ReferentToken)))): pass else: print(" {0}{1}{2}".format( t.getSourceText(), tmp, br.end_token.getSourceText()), end="", file=res, flush=True) t = br.end_token continue if (t.is_hiphen): if (t == end): break elif (t.is_whitespace_before or t.is_whitespace_after): continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is not None): if (not ignore_brackets_and_hiphens): if ((tt.next0_ is not None and tt.next0_.is_hiphen and (isinstance(tt.next0_.next0_, TextToken))) and tt != end and tt.next0_ != end): if (prefix is None): prefix = tt.term else: prefix = "{0}-{1}".format(prefix, tt.term) t = tt.next0_ if (t == end): break else: continue s = None if (cla.value != (0) or not mc.is_undefined or gender != MorphGender.UNDEFINED): for wff in tt.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (cla.value != (0)): if ((((wf.class0_.value) & (cla.value))) == 0): continue if (not mc.is_undefined): if (((wf.case_) & mc).is_undefined): continue if (gender != MorphGender.UNDEFINED): if ((((wf.gender) & (gender))) == (MorphGender.UNDEFINED)): continue if (s is None or wf.normal_case == tt.term): s = wf.normal_case if (s is None and gender != MorphGender.UNDEFINED): for wff in tt.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (cla.value != (0)): if ((((wf.class0_.value) & (cla.value))) == 0): continue if (not mc.is_undefined): if (((wf.case_) & mc).is_undefined): continue if (s is None or wf.normal_case == tt.term): s = wf.normal_case if (s is None): s = tt.term if (tt.chars.is_last_lower and tt.length_char > 2): s = tt.getSourceText() for i in range(len(s) - 1, -1, -1): if (str.isupper(s[i])): s = s[0:0 + i + 1] break if (prefix is not None): delim = "-" if (ignore_brackets_and_hiphens): delim = " " s = "{0}{1}{2}".format(prefix, delim, s) prefix = (None) if (res.tell() > 0 and len(s) > 0): if (str.isalnum(s[0])): ch0 = Utils.getCharAtStringIO(res, res.tell() - 1) if (ch0 == '-'): pass else: print(' ', end="", file=res) elif (not ignore_brackets_and_hiphens and BracketHelper.canBeStartOfSequence( tt, False, False)): print(' ', end="", file=res) print(s, end="", file=res) elif (isinstance(t, NumberToken)): if (res.tell() > 0): if (not t.is_whitespace_before and Utils.getCharAtStringIO( res, res.tell() - 1) == '-'): pass else: print(' ', end="", file=res) nt = Utils.asObjectOrNull(t, NumberToken) if ((t.morph.class0_.is_adjective and nt.typ == NumberSpellingType.WORDS and nt.begin_token == nt.end_token) and (isinstance(nt.begin_token, TextToken))): print((nt.begin_token).term, end="", file=res) else: print(nt.value, end="", file=res) elif (isinstance(t, MetaToken)): if ((ignore_geo_referent and t != begin and t.getReferent() is not None) and t.getReferent().type_name == "GEO"): continue s = ProperNameHelper.getNameEx( (t).begin_token, (t).end_token, cla, mc, gender, ignore_brackets_and_hiphens, ignore_geo_referent) if (not Utils.isNullOrEmpty(s)): if (res.tell() > 0): if (not t.is_whitespace_before and Utils.getCharAtStringIO( res, res.tell() - 1) == '-'): pass else: print(' ', end="", file=res) print(s, end="", file=res) if (t == end): break if (res.tell() == 0): return None return Utils.toStringStringIO(res)
def __TryParse(t: 'Token', prev: 'WeaponItemToken', after_conj: bool, attach_high: bool = False) -> 'WeaponItemToken': if (t is None): return None if (BracketHelper.isBracket(t, True)): wit = WeaponItemToken.__TryParse(t.next0_, prev, after_conj, attach_high) if (wit is not None): if (wit.end_token.next0_ is None): wit.begin_token = t return wit if (BracketHelper.isBracket(wit.end_token.next0_, True)): wit.begin_token = t wit.end_token = wit.end_token.next0_ return wit tok = WeaponItemToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO) if (tok is not None): res = WeaponItemToken(t, tok.end_token) res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs)) if (res.typ == WeaponItemToken.Typs.NOUN): res.value = tok.termin.canonic_text if (tok.termin.tag2 is not None): res.is_doubt = True tt = res.end_token.next0_ first_pass3156 = True while True: if first_pass3156: first_pass3156 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.whitespaces_before_count > 2): break wit = WeaponItemToken.__TryParse(tt, None, False, False) if (wit is not None): if (wit.typ == WeaponItemToken.Typs.BRAND): res.__inner_tokens.append(wit) tt = wit.end_token res.end_token = tt continue break if (not ((isinstance(tt, TextToken)))): break mc = tt.getMorphClassInDictionary() if (mc == MorphClass.ADJECTIVE): if (res.alt_value is None): res.alt_value = res.value if (res.alt_value.endswith(res.value)): res.alt_value = res.alt_value[0:0 + len(res.alt_value) - len(res.value)] res.alt_value = "{0}{1} {2}".format( res.alt_value, (tt).term, res.value) res.end_token = tt continue break return res if (res.typ == WeaponItemToken.Typs.BRAND or res.typ == WeaponItemToken.Typs.NAME): res.value = tok.termin.canonic_text return res if (res.typ == WeaponItemToken.Typs.MODEL): res.value = tok.termin.canonic_text if (isinstance(tok.termin.tag2, list)): li = Utils.asObjectOrNull(tok.termin.tag2, list) for to in li: wit = WeaponItemToken._new2600( t, tok.end_token, Utils.valToEnum(to.tag, WeaponItemToken.Typs), to.canonic_text, tok.begin_token == tok.end_token) res.__inner_tokens.append(wit) if (to.additional_vars is not None and len(to.additional_vars) > 0): wit.alt_value = to.additional_vars[0].canonic_text res.__correctModel() return res nnn = MiscHelper.checkNumberPrefix(t) if (nnn is not None): tit = TransItemToken._attachNumber(nnn, True) if (tit is not None): res = WeaponItemToken._new2601(t, tit.end_token, WeaponItemToken.Typs.NUMBER) res.value = tit.value res.alt_value = tit.alt_value return res if (((isinstance(t, TextToken)) and t.chars.is_letter and t.chars.is_all_upper) and (t.length_char < 4)): if ((t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.isChar('.'))) and (t.next0_.whitespaces_after_count < 2)) and (isinstance(t.next0_.next0_, NumberToken))): res = WeaponItemToken._new2602(t, t.next0_, WeaponItemToken.Typs.MODEL, True) res.value = (t).term res.__correctModel() return res if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after): res = WeaponItemToken._new2602(t, t, WeaponItemToken.Typs.MODEL, True) res.value = (t).term res.__correctModel() return res if ((t).term == "СП" and (t.whitespaces_after_count < 3) and (isinstance(t.next0_, TextToken))): pp = WeaponItemToken.__TryParse(t.next0_, None, False, False) if (pp is not None and ((pp.typ == WeaponItemToken.Typs.MODEL or pp.typ == WeaponItemToken.Typs.BRAND))): res = WeaponItemToken._new2601(t, t, WeaponItemToken.Typs.NOUN) res.value = "ПИСТОЛЕТ" res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ" return res if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): ok = False if (prev is not None and ((prev.typ == WeaponItemToken.Typs.NOUN or prev.typ == WeaponItemToken.Typs.MODEL or prev.typ == WeaponItemToken.Typs.BRAND))): ok = True elif (prev is None and t.previous is not None and t.previous.is_comma_and): ok = True if (ok): res = WeaponItemToken._new2602(t, t, WeaponItemToken.Typs.NAME, True) res.value = (t).term if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, TextToken))) and t.next0_.next0_.chars == t.chars): res.value = "{0}-{1}".format(res.value, (t.next0_.next0_).term) res.end_token = t.next0_.next0_ if (prev is not None and prev.typ == WeaponItemToken.Typs.NOUN): res.typ = WeaponItemToken.Typs.BRAND if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correctModel() elif (not res.end_token.is_whitespace_after and (isinstance(res.end_token.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correctModel() return res return None
def tryAttach(t: 'Token', p1: 'InstrumentParticipant' = None, p2: 'InstrumentParticipant' = None, is_contract: bool = False) -> 'ParticipantToken': if (t is None): return None tt = t br = False if (p1 is None and p2 is None and is_contract): r1 = t.getReferent() if ((r1 is not None and t.next0_ is not None and t.next0_.is_comma_and) and (isinstance(t.next0_.next0_, ReferentToken))): r2 = t.next0_.next0_.getReferent() if (r1.type_name == r2.type_name): ttt = t.next0_.next0_.next0_ refs = list() refs.append(r1) refs.append(r2) first_pass3014 = True while True: if first_pass3014: first_pass3014 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if ((ttt.is_comma_and and ttt.next0_ is not None and ttt.next0_.getReferent() is not None) and ttt.next0_.getReferent().type_name == r1.type_name): ttt = ttt.next0_ if (not ttt.getReferent() in refs): refs.append(ttt.getReferent()) continue break first_pass3015 = True while True: if first_pass3015: first_pass3015 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_comma or ttt.morph.class0_.is_preposition): continue if ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue("ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None)) or ttt.isValue("ТЕКСТ", None)): continue if (ttt.isValue("ДОГОВАРИВАТЬСЯ", None)): continue npt = NounPhraseHelper.tryParse( ttt, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.noun.isValue("СТОРОНА", None) and npt.morph.number != MorphNumber.SINGULAR): re = ParticipantToken._new1467( t, npt.end_token, ParticipantToken.Kinds.NAMEDASPARTS) re.parts = refs return re break if ((isinstance(r1, OrganizationReferent)) or (isinstance(r1, PersonReferent))): has_br = False has_named = False if (isinstance(r1, PersonReferent)): if (t.previous is not None and t.previous.isValue("ЛИЦО", None)): return None elif (t.previous is not None and ((t.previous.isValue("ВЫДАВАТЬ", None) or t.previous.isValue("ВЫДАТЬ", None)))): return None ttt = (t).begin_token while ttt is not None and (ttt.end_char < t.end_char): if (ttt.isChar('(')): has_br = True elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue( "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None)) or ttt.isValue("ТЕКСТ", None)): has_named = True elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.isChar(':')): pass elif (isinstance(ttt, ReferentToken)): pass elif (has_br or has_named): npt = NounPhraseHelper.tryParse( ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0) if (npt is None): break if (has_br): if (npt.end_token.next0_ is None or not npt.end_token.next0_.isChar(')')): break if (not has_named): if (ParticipantToken.M_ONTOLOGY.tryParse( ttt, TerminParseAttr.NO) is None): break re = ParticipantToken._new1467( t, t, ParticipantToken.Kinds.NAMEDAS) re.typ = npt.getNormalCaseText(None, True, MorphGender.UNDEFINED, False) re.parts = list() re.parts.append(r1) return re ttt = ttt.next0_ has_br = False has_named = False end_side = None brr = None add_refs = None ttt = t.next0_ first_pass3016 = True while True: if first_pass3016: first_pass3016 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if ((isinstance(ttt, NumberToken)) and (isinstance(ttt.next0_, TextToken)) and (ttt.next0_).term == "СТОРОНЫ"): ttt = ttt.next0_ end_side = ttt if (ttt.next0_ is not None and ttt.next0_.is_comma): ttt = ttt.next0_ if (ttt.next0_ is not None and ttt.next0_.is_and): break if (brr is not None and ttt.begin_char > brr.end_char): brr = (None) if (BracketHelper.canBeStartOfSequence(ttt, False, False)): brr = BracketHelper.tryParse(ttt, BracketParseAttr.NO, 100) if (brr is not None and (brr.length_char < 7) and ttt.isChar('(')): ttt = brr.end_token brr = (None) continue elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue( "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None)) or ttt.isValue("ТЕКСТ", None)): has_named = True elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.isChar(':')): pass elif (brr is not None or has_named): if (BracketHelper.canBeStartOfSequence( ttt, True, False)): ttt = ttt.next0_ npt = NounPhraseHelper.tryParse( ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0) typ22 = None if (npt is not None): ttt = npt.end_token if (npt.end_token.isValue("ДОГОВОР", None)): continue else: ttok = None if (isinstance(ttt, MetaToken)): ttok = ParticipantToken.M_ONTOLOGY.tryParse( (ttt).begin_token, TerminParseAttr.NO) if (ttok is not None): typ22 = ttok.termin.canonic_text elif (has_named and ttt.morph.class0_.is_adjective): typ22 = ttt.getNormalCaseText( MorphClass.ADJECTIVE, False, MorphGender.UNDEFINED, False) elif (brr is not None): continue else: break if (BracketHelper.canBeEndOfSequence( ttt.next0_, True, None, False)): ttt = ttt.next0_ if (brr is not None): if (ttt.next0_ is None): ttt = brr.end_token continue ttt = ttt.next0_ if (not has_named and typ22 is None): if (ParticipantToken.M_ONTOLOGY.tryParse( npt.begin_token, TerminParseAttr.NO) is None): break re = ParticipantToken._new1467( t, ttt, ParticipantToken.Kinds.NAMEDAS) re.typ = (Utils.ifNotNull( typ22, npt.getNormalCaseText(None, True, MorphGender.UNDEFINED, False))) re.parts = list() re.parts.append(r1) return re elif ((ttt.isValue("ЗАРЕГИСТРИРОВАННЫЙ", None) or ttt.isValue("КАЧЕСТВО", None) or ttt.isValue("ПРОЖИВАЮЩИЙ", None)) or ttt.isValue("ЗАРЕГ", None)): pass elif (ttt.getReferent() == r1): pass elif ( (isinstance(ttt.getReferent(), PersonIdentityReferent)) or (isinstance(ttt.getReferent(), AddressReferent))): if (add_refs is None): add_refs = list() add_refs.append(ttt.getReferent()) else: prr = ttt.kit.processReferent("PERSONPROPERTY", ttt) if (prr is not None): ttt = prr.end_token continue if (isinstance(ttt.getReferent(), GeoReferent)): continue npt = NounPhraseHelper.tryParse( ttt, NounPhraseParseAttr.NO, 0) if (npt is not None): if ((npt.noun.isValue("МЕСТО", None) or npt.noun.isValue("ЖИТЕЛЬСТВО", None) or npt.noun.isValue("ПРЕДПРИНИМАТЕЛЬ", None)) or npt.noun.isValue("ПОЛ", None) or npt.noun.isValue("РОЖДЕНИЕ", None)): ttt = npt.end_token continue if (ttt.is_newline_before): break if (ttt.length_char < 3): continue mc = ttt.getMorphClassInDictionary() if (mc.is_adverb or mc.is_adjective): continue if (ttt.chars.is_all_upper): continue break if (end_side is not None or ((add_refs is not None and t.previous is not None and t.previous.is_and))): re = ParticipantToken._new1467( t, Utils.ifNotNull(end_side, t), ParticipantToken.Kinds.NAMEDAS) re.typ = (None) re.parts = list() re.parts.append(r1) if (add_refs is not None): re.parts.extend(add_refs) return re too = ParticipantToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO) if (too is not None): if ((isinstance(t.previous, TextToken)) and t.previous.isValue("ЛИЦО", None)): too = (None) if (too is not None and too.termin.tag is not None and too.termin.canonic_text != "СТОРОНА"): tt1 = too.end_token.next0_ if (tt1 is not None): if (tt1.is_hiphen or tt1.isChar(':')): tt1 = tt1.next0_ if (isinstance(tt1, ReferentToken)): r1 = tt1.getReferent() if ((isinstance(r1, PersonReferent)) or (isinstance(r1, OrganizationReferent))): re = ParticipantToken._new1467( t, tt1, ParticipantToken.Kinds.NAMEDAS) re.typ = too.termin.canonic_text re.parts = list() re.parts.append(r1) return re add_typ1 = (None if p1 is None else p1.typ) add_typ2 = (None if p2 is None else p2.typ) if (BracketHelper.canBeStartOfSequence(tt, False, False) and tt.next0_ is not None): br = True tt = tt.next0_ term1 = None term2 = None if (add_typ1 is not None and add_typ1.find(' ') > 0 and not add_typ1.startswith("СТОРОНА")): term1 = Termin(add_typ1) if (add_typ2 is not None and add_typ2.find(' ') > 0 and not add_typ2.startswith("СТОРОНА")): term2 = Termin(add_typ2) named = False typ_ = None t1 = None t0 = tt first_pass3017 = True while True: if first_pass3017: first_pass3017 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.morph.class0_.is_preposition and typ_ is not None): continue if (tt.isCharOf("(:)") or tt.is_hiphen): continue if (tt.is_table_control_char): break if (tt.is_newline_before and tt != t0): if (isinstance(tt, NumberToken)): break if ((isinstance(tt, TextToken)) and (isinstance(tt.previous, TextToken))): if (tt.previous.isValue((tt).term, None)): break if (BracketHelper.isBracket(tt, False)): continue tok = (ParticipantToken.M_ONTOLOGY.tryParse( tt, TerminParseAttr.NO) if ParticipantToken.M_ONTOLOGY is not None else None) if (tok is not None and (isinstance(tt.previous, TextToken))): if (tt.previous.isValue("ЛИЦО", None)): return None if (tok is None): if (add_typ1 is not None and ((MiscHelper.isNotMoreThanOneError(add_typ1, tt) or ((((isinstance(tt, MetaToken))) and (tt).begin_token.isValue(add_typ1, None)))))): if (typ_ is not None): if (not ParticipantToken.__isTypesEqual( add_typ1, typ_)): break typ_ = add_typ1 t1 = tt continue if (add_typ2 is not None and ((MiscHelper.isNotMoreThanOneError(add_typ2, tt) or ((((isinstance(tt, MetaToken))) and (tt).begin_token.isValue(add_typ2, None)))))): if (typ_ is not None): if (not ParticipantToken.__isTypesEqual( add_typ2, typ_)): break typ_ = add_typ2 t1 = tt continue if (tt.chars.is_letter): if (term1 is not None): tok1 = term1.tryParse(tt, TerminParseAttr.NO) if (tok1 is not None): if (typ_ is not None): if (not ParticipantToken.__isTypesEqual( add_typ1, typ_)): break typ_ = add_typ1 tt = tok1.end_token t1 = tt continue if (term2 is not None): tok2 = term2.tryParse(tt, TerminParseAttr.NO) if (tok2 is not None): if (typ_ is not None): if (not ParticipantToken.__isTypesEqual( add_typ2, typ_)): break typ_ = add_typ2 tt = tok2.end_token t1 = tt continue if (named and tt.getMorphClassInDictionary().is_noun): if (not tt.chars.is_all_lower or BracketHelper.isBracket(tt.previous, True)): if (DecreeToken.isKeyword(tt, False) is None): val = tt.getNormalCaseText( MorphClass.NOUN, True, MorphGender.UNDEFINED, False) if (typ_ is not None): if (not ParticipantToken.__isTypesEqual( typ_, val)): break typ_ = val t1 = tt continue if (named and typ_ is None and is_contract): if ((isinstance(tt, TextToken)) and tt.chars.is_cyrillic_letter and tt.chars.is_capital_upper): dc = tt.getMorphClassInDictionary() if (dc.is_undefined or dc.is_noun): dt = DecreeToken.tryAttach(tt, None, False) ok = True if (dt is not None): ok = False elif (tt.isValue("СТОРОНА", None)): ok = False if (ok): typ_ = (tt).getLemma() t1 = tt continue if (dc.is_adjective): npt = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.NO, 0) if (npt is not None and len(npt.adjectives) > 0 and npt.noun.getMorphClassInDictionary( ).is_noun): typ_ = npt.getNormalCaseText( None, True, MorphGender.UNDEFINED, False) t1 = npt.end_token continue if (tt == t): break if ((isinstance(tt, NumberToken)) or tt.isChar('.')): break if (tt.length_char < 4): if (typ_ is not None): continue break if (tok.termin.tag is None): named = True else: if (typ_ is not None): break if (tok.termin.canonic_text == "СТОРОНА"): tt1 = tt.next0_ if (tt1 is not None and tt1.is_hiphen): tt1 = tt1.next0_ if (not ((isinstance(tt1, NumberToken)))): break if (tt1.is_newline_before): break typ_ = "{0} {1}".format(tok.termin.canonic_text, (tt1).value) t1 = tt1 else: typ_ = tok.termin.canonic_text t1 = tok.end_token break tt = tok.end_token if (typ_ is None): return None if (not named and t1 != t and not typ_.startswith("СТОРОНА")): if (not ParticipantToken.__isTypesEqual(typ_, add_typ1) and not ParticipantToken.__isTypesEqual(typ_, add_typ2)): return None if (BracketHelper.canBeEndOfSequence(t1.next0_, False, None, False)): t1 = t1.next0_ if (not t.is_whitespace_before and BracketHelper.canBeStartOfSequence( t.previous, False, False)): t = t.previous elif (BracketHelper.canBeStartOfSequence(t, False, False) and BracketHelper.canBeEndOfSequence(t1.next0_, True, t, True)): t1 = t1.next0_ if (br and t1.next0_ is not None and BracketHelper.canBeEndOfSequence( t1.next0_, False, None, False)): t1 = t1.next0_ res = ParticipantToken._new1472( t, t1, (ParticipantToken.Kinds.NAMEDAS if named else ParticipantToken.Kinds.PURE), typ_) if (t.isChar(':')): res.begin_token = t.next0_ return res
def attachFirst(self, p: 'InstrumentParticipant', min_char: int, max_char: int) -> 'ReferentToken': tt0 = self.begin_token refs = list() t = tt0.previous first_pass3019 = True while True: if first_pass3019: first_pass3019 = False else: t = t.previous if (not (t is not None and t.begin_char >= min_char)): break if (t.is_newline_after): if (t.newlines_after_count > 1): break if (isinstance(t.next0_, NumberToken)): break tt = ParticipantToken.__tryAttachContractGround(t, p, False) if (tt is not None): continue r = t.getReferent() if (((((isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent)) or (isinstance(r, PersonReferent))) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, AddressReferent))) or (isinstance(r, UriReferent)) or (isinstance(r, PersonIdentityReferent))) or (isinstance(r, BankDataReferent))): if (not r in refs): refs.insert(0, r) tt0 = t if (len(refs) > 0): for r in refs: if (r != refs[0] and (isinstance(refs[0], OrganizationReferent)) and (((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))))): p.addSlot(InstrumentParticipant.ATTR_DELEGATE, r, False, 0) else: p.addSlot(InstrumentParticipant.ATTR_REF, r, False, 0) rt = ReferentToken(p, tt0, self.end_token) t = self.end_token.next0_ if (BracketHelper.isBracket(t, False)): t = t.next0_ if (t is not None and t.isChar(',')): t = t.next0_ first_pass3020 = True while True: if first_pass3020: first_pass3020 = False else: t = t.next0_ if (not (t is not None and ((max_char == 0 or t.begin_char <= max_char)))): break if (t.isValue("СТОРОНА", None)): break r = t.getReferent() if (((((isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent)) or (isinstance(r, PersonReferent))) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, AddressReferent))) or (isinstance(r, UriReferent)) or (isinstance(r, PersonIdentityReferent))) or (isinstance(r, BankDataReferent))): if ((((isinstance(r, PersonPropertyReferent)) and t.next0_ is not None and t.next0_.is_comma) and (isinstance(t.next0_.next0_, ReferentToken)) and (isinstance(t.next0_.next0_.getReferent(), PersonReferent))) and not t.next0_.is_newline_after): pe = Utils.asObjectOrNull(t.next0_.next0_.getReferent(), PersonReferent) pe.addSlot(PersonReferent.ATTR_ATTR, r, False, 0) r = (pe) t = t.next0_.next0_ is_delegate = False if (t.previous.isValue("ЛИЦО", None) or t.previous.isValue("ИМЯ", None)): is_delegate = True if (t.previous.isValue("КОТОРЫЙ", None) and t.previous.previous is not None and ((t.previous.previous.isValue("ИМЯ", None) or t.previous.previous.isValue("ЛИЦО", None)))): is_delegate = True p.addSlot( (InstrumentParticipant.ATTR_DELEGATE if (((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)))) and is_delegate else InstrumentParticipant.ATTR_REF), r, False, 0) rt.end_token = t continue tt = ParticipantToken.__tryAttachContractGround(t, p, False) if (tt is not None): rt.end_token = tt t = rt.end_token if (rt.begin_char == tt.begin_char): rt.begin_token = tt continue if (t.isValue("В", None) and t.next0_ is not None and t.next0_.isValue("ЛИЦО", None)): t = t.next0_ continue if (t.isValue("ОТ", None) and t.next0_ is not None and t.next0_.isValue("ИМЯ", None)): t = t.next0_ continue if (t.isValue("ПО", None) and t.next0_ is not None and t.next0_.isValue("ПОРУЧЕНИЕ", None)): t = t.next0_ continue if (t.is_newline_before): break if (t.getMorphClassInDictionary() == MorphClass.VERB): if ((not t.isValue("УДОСТОВЕРЯТЬ", None) and not t.isValue("ПРОЖИВАТЬ", None) and not t.isValue("ЗАРЕГИСТРИРОВАТЬ", None)) and not t.isValue("ДЕЙСТВОВАТЬ", None)): break if (t.is_and and t.previous is not None and t.previous.is_comma): break if (t.is_and and t.next0_.getReferent() is not None): if (isinstance(t.next0_.getReferent(), OrganizationReferent)): break pe = Utils.asObjectOrNull(t.next0_.getReferent(), PersonReferent) if (pe is not None): has_ip = False for s in pe.slots: if (s.type_name == PersonReferent.ATTR_ATTR): if (str(s.value).startswith( "индивидуальный предприниматель")): has_ip = True break if (has_ip): break t = rt.begin_token while t is not None and t.end_char <= rt.end_char: tt = ParticipantToken.__tryAttachContractGround(t, p, True) if (tt is not None): if (tt.end_char > rt.end_char): rt.end_token = tt t = tt t = t.next0_ return rt
def tryParse(t: 'Token', loc_onto: 'IntOntologyCollection') -> 'NamedItemToken': if (t is None): return None if (isinstance(t, ReferentToken)): r = t.getReferent() if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or (isinstance(r, GeoReferent))) or r.type_name == "ORGANIZATION"): return NamedItemToken._new1635(t, t, r, t.morph) return None typ = NamedItemToken.__m_types.tryParse(t, TerminParseAttr.NO) nam = NamedItemToken.__m_names.tryParse(t, TerminParseAttr.NO) if (typ is not None): if (not ((isinstance(t, TextToken)))): return None res = NamedItemToken._new1636(typ.begin_token, typ.end_token, typ.morph, typ.chars) res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind)) res.type_value = typ.termin.canonic_text if ((nam is not None and nam.end_token == typ.end_token and not t.chars.is_all_lower) and (Utils.valToEnum( nam.termin.tag, NamedEntityKind)) == res.kind): res.name_value = nam.termin.canonic_text res.is_wellknown = True return res if (nam is not None): if (nam.begin_token.chars.is_all_lower): return None res = NamedItemToken._new1636(nam.begin_token, nam.end_token, nam.morph, nam.chars) res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) res.name_value = nam.termin.canonic_text ok = True if (not t.is_whitespace_before and t.previous is not None): ok = False elif (not t.is_whitespace_after and t.next0_ is not None): if (t.next0_.isCharOf(",.;!?") and t.next0_.is_whitespace_after): pass else: ok = False if (ok): res.is_wellknown = True res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str)) return res adj = MiscLocationHelper.tryAttachNordWest(t) if (adj is not None): if (adj.morph.class0_.is_noun): if (adj.end_token.isValue("ВОСТОК", None)): if (adj.begin_token == adj.end_token): return None re = NamedItemToken._new1638(t, adj.end_token, adj.morph) re.kind = NamedEntityKind.LOCATION re.name_value = MiscHelper.getTextValue( t, adj.end_token, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) re.is_wellknown = True return re return None if (adj.whitespaces_after_count > 2): return None if ((isinstance(adj.end_token.next0_, ReferentToken)) and (isinstance(adj.end_token.next0_.getReferent(), GeoReferent))): re = NamedItemToken._new1638(t, adj.end_token.next0_, adj.end_token.next0_.morph) re.kind = NamedEntityKind.LOCATION re.name_value = MiscHelper.getTextValue( t, adj.end_token.next0_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) re.is_wellknown = True re.ref = adj.end_token.next0_.getReferent() return re res = NamedItemToken.tryParse(adj.end_token.next0_, loc_onto) if (res is not None and res.kind == NamedEntityKind.LOCATION): s = adj.getNormalCaseText(MorphClass.ADJECTIVE, True, res.morph.gender, False) if (s is not None): if (res.name_value is None): res.name_value = s.upper() else: res.name_value = "{0} {1}".format( s.upper(), res.name_value) res.type_value = (None) res.begin_token = t res.chars = t.chars res.is_wellknown = True return res if (t.chars.is_capital_upper and not MiscHelper.canBeStartOfSentence(t)): npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None and len(npt.adjectives) > 0): test = NamedItemToken.tryParse(npt.noun.begin_token, loc_onto) if (test is not None and test.end_token == npt.end_token and test.type_value is not None): test.begin_token = t tmp = io.StringIO() for a in npt.adjectives: s = a.getNormalCaseText(MorphClass.ADJECTIVE, True, test.morph.gender, False) if (tmp.tell() > 0): print(' ', end="", file=tmp) print(s, end="", file=tmp) test.name_value = Utils.toStringStringIO(tmp) test.chars = t.chars if (test.kind == NamedEntityKind.LOCATION): test.is_wellknown = True return test if ((BracketHelper.isBracket(t, True) and t.next0_ is not None and t.next0_.chars.is_letter) and not t.next0_.chars.is_all_lower): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): res = NamedItemToken(t, br.end_token) res.is_in_bracket = True res.name_value = MiscHelper.getTextValue( t, br.end_token, GetTextAttr.NO) nam = NamedItemToken.__m_names.tryParse( t.next0_, TerminParseAttr.NO) if (nam is not None and nam.end_token == br.end_token.previous): res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) res.is_wellknown = True res.name_value = nam.termin.canonic_text return res if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): res = NamedItemToken._new1638(t, t, t.morph) str0_ = (t).term if (str0_.endswith("О") or str0_.endswith("И") or str0_.endswith("Ы")): res.name_value = str0_ else: res.name_value = t.getNormalCaseText(None, False, MorphGender.UNDEFINED, False) res.chars = t.chars if (((not t.is_whitespace_after and t.next0_ is not None and t.next0_.is_hiphen) and (isinstance(t.next0_.next0_, TextToken)) and not t.next0_.next0_.is_whitespace_after) and t.chars.is_cyrillic_letter == t.next0_.next0_.chars.is_cyrillic_letter): res.end_token = t.next0_.next0_ t = res.end_token res.name_value = "{0}-{1}".format( res.name_value, t.getNormalCaseText(None, False, MorphGender.UNDEFINED, False)) return res return None
def __TryAttach(t: 'Token', prev: typing.List['DateItemToken']) -> 'DateItemToken': from pullenti.ner.measure.internal.MeasureToken import MeasureToken if (t is None): return None nt = Utils.asObjectOrNull(t, NumberToken) begin = t end = t is_in_brack = False if ((BracketHelper.canBeStartOfSequence(t, False, False) and t.next0_ is not None and (isinstance(t.next0_, NumberToken))) and BracketHelper.canBeEndOfSequence(t.next0_.next0_, False, None, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if ((t.is_newline_before and BracketHelper.isBracket(t, False) and (isinstance(t.next0_, NumberToken))) and BracketHelper.isBracket(t.next0_.next0_, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if (nt is not None): if (nt.int_value is None): return None if (nt.typ == NumberSpellingType.WORDS): if (nt.morph.class0_.is_noun and not nt.morph.class0_.is_adjective): if (t.next0_ is not None and ((t.next0_.isValue("КВАРТАЛ", None) or t.next0_.isValue("ПОЛУГОДИЕ", None) or t.next0_.isValue("ПІВРІЧЧЯ", None)))): pass else: return None if (NumberHelper.tryParseAge(nt) is not None): return None res = DateItemToken._new653(begin, end, DateItemToken.DateItemType.NUMBER, nt.int_value, nt.morph) if ((res.int_value == 20 and (isinstance(nt.next0_, NumberToken)) and (nt.next0_).int_value is not None) and nt.next0_.length_char == 2 and prev is not None): num = 2000 + (nt.next0_).int_value if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): ok = False if (nt.whitespaces_after_count == 1): ok = True elif (nt.is_newline_after and nt.is_newline_after): ok = True if (ok): nt = (Utils.asObjectOrNull(nt.next0_, NumberToken)) res.end_token = nt res.int_value = num if (res.int_value == 20 or res.int_value == 201): tt = t.next0_ if (tt is not None and tt.isChar('_')): while tt is not None: if (not tt.isChar('_')): break tt = tt.next0_ tt = DateItemToken.__testYearRusWord(tt, False) if (tt is not None): res.int_value = 0 res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR return res if (res.int_value <= 12 and t.next0_ is not None and (t.whitespaces_after_count < 3)): tt = t.next0_ if (tt.isValue("ЧАС", None)): if (((isinstance(t.previous, TextToken)) and not t.previous.chars.is_letter and not t.is_whitespace_before) and (isinstance(t.previous.previous, NumberToken)) and not t.previous.is_whitespace_before): pass else: res.typ = DateItemToken.DateItemType.HOUR res.end_token = tt tt = tt.next0_ if (tt is not None and tt.isChar('.')): res.end_token = tt tt = tt.next0_ first_pass2816 = True while True: if first_pass2816: first_pass2816 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.isValue("УТРО", "РАНОК")): res.end_token = tt res.typ = DateItemToken.DateItemType.HOUR return res if (tt.isValue("ВЕЧЕР", "ВЕЧІР")): res.end_token = tt res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.isValue("ДЕНЬ", None)): res.end_token = tt if (res.int_value < 10): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.isValue("НОЧЬ", "НІЧ")): res.end_token = tt if (res.int_value == 12): res.int_value = 0 elif (res.int_value > 9): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_comma or tt.morph.class0_.is_adverb): continue break if (res.typ == DateItemToken.DateItemType.HOUR): return res can_be_year_ = True if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): pass elif ((prev is not None and len(prev) >= 4 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM) and prev[len(prev) - 2].can_by_month): pass elif (nt.next0_ is not None and ((nt.next0_.isValue("ГОД", None) or nt.next0_.isValue("РІК", None)))): if (res.int_value < 1000): can_be_year_ = False tt = DateItemToken.__testYearRusWord(nt.next0_, False) if (tt is not None and DateItemToken.__isNewAge(tt.next0_)): res.typ = DateItemToken.DateItemType.YEAR res.end_token = tt elif (can_be_year_): if (res.can_be_year): tt = DateItemToken.__testYearRusWord( nt.next0_, res.is_newline_before) if ((tt) is not None): if ((tt.isValue("Г", None) and not tt.is_whitespace_before and t.previous is not None) and ((t.previous.isValue("КОРПУС", None) or t.previous.isValue("КОРП", None)))): pass elif ( (((nt.next0_.isValue("Г", None) and (t.whitespaces_before_count < 3) and t.previous is not None) and t.previous.isValue("Я", None) and t.previous.previous is not None) and t.previous.previous.isCharOf("\\/") and t.previous.previous.previous is not None) and t.previous.previous.previous.isValue( "А", None)): return None else: res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language elif (tt is not None and (nt.whitespaces_after_count < 2) and (nt.end_char - nt.begin_char) == 1): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language if (nt.previous is not None): if (nt.previous.isValue("В", "У") or nt.previous.isValue("К", None) or nt.previous.isValue("ДО", None)): tt = DateItemToken.__testYearRusWord(nt.next0_, False) if ((tt) is not None): ok = False if ((res.int_value < 100) and (isinstance(tt, TextToken)) and (((tt).term == "ГОДА" or (tt).term == "РОКИ"))): pass else: ok = True if (nt.previous.isValue("ДО", None) and nt.next0_.isValue("Г", None)): cou = 0 ttt = nt.previous.previous while ttt is not None and (cou < 10): mt = MeasureToken.tryParse( ttt, None, False, False) if (mt is not None and mt.end_char > nt.end_char): ok = False break ttt = ttt.previous cou += 1 if (ok): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language res.begin_token = nt.previous elif (((nt.previous.isValue("IN", None) or nt.previous.isValue("SINCE", None))) and res.can_be_year): res.typ = DateItemToken.DateItemType.YEAR res.begin_token = nt.previous elif (nt.previous.isValue("NEL", None) or nt.previous.isValue("DEL", None)): if (res.can_be_year): res.typ = DateItemToken.DateItemType.YEAR res.lang = MorphLang.IT res.begin_token = nt.previous elif (nt.previous.isValue("IL", None) and res.can_be_day): res.lang = MorphLang.IT res.begin_token = nt.previous t1 = res.end_token.next0_ if (t1 is not None): if ((t1.isValue("ЧАС", None) or t1.isValue("ГОДИНА", None))): if ((((prev is not None and len(prev) == 2 and prev[0].can_be_hour) and prev[1].typ == DateItemToken.DateItemType.DELIM and not prev[1].is_whitespace_after) and not prev[1].is_whitespace_after and res.int_value >= 0) and (res.int_value < 59)): prev[0].typ = DateItemToken.DateItemType.HOUR res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif (res.int_value < 24): if (t1.next0_ is not None and t1.next0_.isChar('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.HOUR res.end_token = t1 elif ((res.int_value < 60) and ((t1.isValue("МИНУТА", None) or t1.isValue("МИН", None) or t.isValue("ХВИЛИНА", None)))): if (t1.next0_ is not None and t1.next0_.isChar('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif ((res.int_value < 60) and ((t1.isValue("СЕКУНДА", None) or t1.isValue("СЕК", None)))): if (t1.next0_ is not None and t1.next0_.isChar('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.SECOND res.end_token = t1 elif ((res.int_value < 30) and ((t1.isValue("ВЕК", "ВІК") or t1.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")))): res.typ = DateItemToken.DateItemType.CENTURY res.end_token = t1 elif (res.int_value <= 4 and t1.isValue("КВАРТАЛ", None)): res.typ = DateItemToken.DateItemType.QUARTAL res.end_token = t1 elif (res.int_value <= 2 and ((t1.isValue("ПОЛУГОДИЕ", None) or t1.isValue("ПІВРІЧЧЯ", None)))): res.typ = DateItemToken.DateItemType.HALFYEAR res.end_token = t1 return res t0 = Utils.asObjectOrNull(t, TextToken) if (t0 is None): return None txt = t0.getSourceText() if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х') or txt[0] == 'V'): lat = NumberHelper.tryParseRoman(t) if (lat is not None and lat.end_token.next0_ is not None and lat.int_value is not None): val = lat.int_value tt = lat.end_token.next0_ if (tt.isValue("КВАРТАЛ", None) and val > 0 and val <= 4): return DateItemToken._new654( t, tt, DateItemToken.DateItemType.QUARTAL, val) if (tt.isValue("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0 and val <= 2): return DateItemToken._new654( t, lat.end_token.next0_, DateItemToken.DateItemType.HALFYEAR, val) if (tt.isValue("ВЕК", "ВІК") or tt.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")): return DateItemToken._new654( t, lat.end_token.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.isValue("В", None) and tt.next0_ is not None and tt.next0_.isChar('.')): if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.POINTER): return DateItemToken._new654( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (DateItemToken.__isNewAge(tt.next0_.next0_)): return DateItemToken._new654( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.is_hiphen): lat2 = NumberHelper.tryParseRoman(tt.next0_) if ((lat2 is not None and lat2.int_value is not None and lat2.int_value > val) and lat2.end_token.next0_ is not None): if (lat2.end_token.next0_.isValue("ВЕК", "ВІК") or lat2.end_token.next0_.isValue( "СТОЛЕТИЕ", "СТОЛІТТЯ")): return DateItemToken._new654( t, lat.end_token, DateItemToken.DateItemType.CENTURY, val) if (t is not None and t.isValue("НАПРИКІНЦІ", None)): return DateItemToken._new660(t, t, DateItemToken.DateItemType.POINTER, "конец") if (t is not None and t.isValue("ДОНЕДАВНА", None)): return DateItemToken._new660(t, t, DateItemToken.DateItemType.POINTER, "сегодня") tok = DateItemToken.M_SEASONS.tryParse(t, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = (t).term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new654( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None): tok = DateItemToken.M_SEASONS.tryParse(npt.end_token, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = (t).term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new654( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) typ_ = DateItemToken.DateItemType.NUMBER if (npt.noun.isValue("КВАРТАЛ", None)): typ_ = DateItemToken.DateItemType.QUARTAL elif (npt.end_token.isValue("ПОЛУГОДИЕ", None) or npt.end_token.isValue("ПІВРІЧЧЯ", None)): typ_ = DateItemToken.DateItemType.HALFYEAR elif (npt.end_token.isValue("НАЧАЛО", None) or npt.end_token.isValue("ПОЧАТОК", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "начало") elif (npt.end_token.isValue("СЕРЕДИНА", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "середина") elif (npt.end_token.isValue("КОНЕЦ", None) or npt.end_token.isValue("КІНЕЦЬ", None) or npt.end_token.isValue("НАПРИКІНЕЦЬ", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "конец") elif (npt.end_token.isValue("ВРЕМЯ", None) and len(npt.adjectives) > 0 and npt.end_token.previous.isValue("НАСТОЯЩЕЕ", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") elif (npt.end_token.isValue("ЧАС", None) and len(npt.adjectives) > 0 and npt.end_token.previous.isValue("ДАНИЙ", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") if (typ_ != DateItemToken.DateItemType.NUMBER): delta = 0 if (len(npt.adjectives) > 0): if (npt.adjectives[0].isValue("ПОСЛЕДНИЙ", None) or npt.adjectives[0].isValue("ОСТАННІЙ", None)): return DateItemToken._new654( t0, npt.end_token, typ_, (4 if typ_ == DateItemToken.DateItemType.QUARTAL else 2)) if (npt.adjectives[0].isValue("ПРЕДЫДУЩИЙ", None) or npt.adjectives[0].isValue("ПОПЕРЕДНІЙ", None)): delta = -1 elif (npt.adjectives[0].isValue("СЛЕДУЮЩИЙ", None) or npt.adjectives[0].isValue("ПОСЛЕДУЮЩИЙ", None) or npt.adjectives[0].isValue("НАСТУПНИЙ", None)): delta = 1 else: return None cou = 0 tt = t.previous first_pass2817 = True while True: if first_pass2817: first_pass2817 = False else: tt = tt.previous if (not (tt is not None)): break if (cou > 200): break dr = Utils.asObjectOrNull(tt.getReferent(), DateRangeReferent) if (dr is None): continue if (typ_ == DateItemToken.DateItemType.QUARTAL): ii = dr.quarter_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 4): continue return DateItemToken._new654(t0, npt.end_token, typ_, ii) if (typ_ == DateItemToken.DateItemType.HALFYEAR): ii = dr.halfyear_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 2): continue return DateItemToken._new654(t0, npt.end_token, typ_, ii) term = t0.term if (not str.isalnum(term[0])): if (t0.isCharOf(".\\/:") or t0.is_hiphen): return DateItemToken._new660(t0, t0, DateItemToken.DateItemType.DELIM, term) elif (t0.isChar(',')): return DateItemToken._new660(t0, t0, DateItemToken.DateItemType.DELIM, term) else: return None if (term == "O" or term == "О"): if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after and len( (t.next0_).value) == 1): return DateItemToken._new654(t, t.next0_, DateItemToken.DateItemType.NUMBER, (t.next0_).int_value) if (str.isalpha(term[0])): inf = DateItemToken.M_MONTHES.tryParse(t, TerminParseAttr.NO) if (inf is not None and inf.termin.tag is None): inf = DateItemToken.M_MONTHES.tryParse(inf.end_token.next0_, TerminParseAttr.NO) if (inf is not None and (isinstance(inf.termin.tag, int))): return DateItemToken._new675(inf.begin_token, inf.end_token, DateItemToken.DateItemType.MONTH, inf.termin.tag, inf.termin.lang) return None
def __TryParse(t: 'Token', prev: 'TransItemToken', after_conj: bool, attach_high: bool = False) -> 'TransItemToken': if (t is None): return None t1 = t if (t1.isChar(',')): t1 = t1.next0_ if (t1 is not None and t1.isValue("ПРИНАДЛЕЖАТЬ", "НАЛЕЖАТИ")): t1 = t1.next0_ if (isinstance(t1, ReferentToken)): if (t1.getReferent().type_name == "ORGANIZATION"): return TransItemToken._new2521(t, t1, TransItemToken.Typs.ORG, t1.getReferent(), t1.morph) route = False if (t1 is not None and ((t1.isValue("СЛЕДОВАТЬ", "СЛІДУВАТИ") or t1.isValue("ВЫПОЛНЯТЬ", "ВИКОНУВАТИ")))): t1 = t1.next0_ route = True if (t1 is not None and t1.morph.class0_.is_preposition): t1 = t1.next0_ if (t1 is not None and ((t1.isValue("РЕЙС", None) or t1.isValue("МАРШРУТ", None)))): t1 = t1.next0_ route = True if (isinstance(t1, ReferentToken)): if (isinstance(t1.getReferent(), GeoReferent)): geo_ = Utils.asObjectOrNull(t1.getReferent(), GeoReferent) if (geo_.is_state or geo_.is_city): tit = TransItemToken._new2522(t, t1, TransItemToken.Typs.ROUTE, list()) tit.route_items.append(geo_) t1 = t1.next0_ first_pass3132 = True while True: if first_pass3132: first_pass3132 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_hiphen): continue if (t1.morph.class0_.is_preposition or t1.morph.class0_.is_conjunction): continue geo_ = (Utils.asObjectOrNull(t1.getReferent(), GeoReferent)) if (geo_ is None): break if (not geo_.is_city and not geo_.is_state): break tit.route_items.append(geo_) tit.end_token = t1 if (len(tit.route_items) > 1 or route): return tit elif ((isinstance(t1.getReferent(), DateReferent)) and (t1.whitespaces_before_count < 3)): tit = TransItemToken._new2523(t, t1, TransItemToken.Typs.DATE, t1.getReferent()) if (t1.next0_ is not None): if (t1.next0_.isValue("В", None) and t1.next0_.next0_ is not None and t1.next0_.next0_.isChar('.')): tit.end_token = t1.next0_.next0_ elif (t1.next0_.isValue("ВЫП", None) or t1.next0_.isValue("ВЫПУСК", None)): tit.end_token = t1.next0_ if (t1.next0_.next0_ is not None and t1.next0_.next0_.isChar('.')): tit.end_token = t1.next0_.next0_ return tit if (isinstance(t, TextToken)): num = MiscHelper.checkNumberPrefix(t) if (num is not None): tit = TransItemToken.__attachRusAutoNumber(num) if (tit is None): tit = TransItemToken._attachNumber(num, False) if (tit is not None): tit.begin_token = t return tit tok = TransItemToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO) if (tok is None and ((t.isValue("С", None) or t.isValue("C", None) or t.isValue("ЗА", None)))): tok = TransItemToken.M_ONTOLOGY.tryParse( t.next0_, TerminParseAttr.NO) if (tok is None and BracketHelper.isBracket(t, True)): tok1 = TransItemToken.M_ONTOLOGY.tryParse( t.next0_, TerminParseAttr.NO) if (tok1 is not None and BracketHelper.isBracket( tok1.end_token.next0_, True)): tok = tok1 tok.begin_token = t tok.end_token = tok.end_token.next0_ tok.begin_token = t elif (tok1 is not None): tt = Utils.asObjectOrNull(tok1.termin, TransItemToken.TransTermin) if (tt.typ == TransItemToken.Typs.BRAND): tok = tok1 tok.begin_token = t if (tok is None and t.isValue("МАРКА", None)): res1 = TransItemToken.__TryParse(t.next0_, prev, after_conj, False) if (res1 is not None): if (res1.typ == TransItemToken.Typs.NAME or res1.typ == TransItemToken.Typs.BRAND): res1.begin_token = t res1.typ = TransItemToken.Typs.BRAND return res1 if (tok is not None): tt = Utils.asObjectOrNull(tok.termin, TransItemToken.TransTermin) if (tt.typ == TransItemToken.Typs.NUMBER): tit = TransItemToken.__attachRusAutoNumber( tok.end_token.next0_) if (tit is None): tit = TransItemToken._attachNumber( tok.end_token.next0_, False) if (tit is not None): tit.begin_token = t return tit else: return None if (tt.is_doubt and not attach_high): if (prev is None or prev.typ != TransItemToken.Typs.NOUN): if ((prev is not None and prev.typ == TransItemToken.Typs.BRAND and tt.typ == TransItemToken.Typs.BRAND) and Utils.compareStrings( tt.canonic_text, prev.value, True) == 0): pass else: return None if (tt.canonic_text == "СУДНО"): if ((((tok.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): if (not BracketHelper.canBeStartOfSequence( tok.end_token.next0_, False, False)): return None tit = TransItemToken._new2524(tok.begin_token, tok.end_token, tt.kind, tt.typ, tt.is_doubt, tok.chars, tok.morph) tit.value = tt.canonic_text if (tit.typ == TransItemToken.Typs.NOUN): tit.value = tit.value.lower() else: tit.value = tit.value.upper() return tit if (tok is None and t.morph.class0_.is_adjective): npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None and len(npt.adjectives) > 0): state_ = None tt = t first_pass3133 = True while True: if first_pass3133: first_pass3133 = False else: tt = tt.next0_ if (not (tt is not None and tt.previous != npt.end_token)): break tok = TransItemToken.M_ONTOLOGY.tryParse( tt, TerminParseAttr.NO) if (tok is None and state_ is None): state_ = tt.kit.processReferent("GEO", tt) if (tok is not None and tok.end_token == npt.end_token): if ((tok.termin).typ == TransItemToken.Typs.NOUN): tit = TransItemToken._new2524( t, tok.end_token, (tok.termin).kind, TransItemToken.Typs.NOUN, (tok.termin).is_doubt, tok.chars, npt.morph) tit.value = (tok.termin).canonic_text.lower() tit.alt_value = npt.getNormalCaseText( None, False, MorphGender.UNDEFINED, False).lower() if (LanguageHelper.endsWithEx( tit.alt_value, "суд", "суда", None, None)): if (not BracketHelper.canBeStartOfSequence( tok.end_token.next0_, False, False)): continue if (state_ is not None): if ((state_.referent).is_state): tit.state = state_ return tit if (t is not None and t.isValue("КЛАСС", None) and t.next0_ is not None): br = BracketHelper.tryParse(t.next0_, BracketParseAttr.NO, 100) if (br is not None): return TransItemToken._new2526( t, br.end_token, TransItemToken.Typs.CLASS, MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO)) nt = Utils.asObjectOrNull(t, NumberToken) if (nt is not None): if (prev is None or nt.typ != NumberSpellingType.DIGIT): return None if (prev.typ == TransItemToken.Typs.BRAND): return TransItemToken.__attachModel(t, False, prev) else: return None res = TransItemToken.__attachRusAutoNumber(t) if ((res) is not None): if (not res.is_doubt): return res if (prev is not None and prev.typ == TransItemToken.Typs.NOUN and prev.kind == TransportKind.AUTO): return res if (prev is not None and ((prev.typ == TransItemToken.Typs.BRAND or prev.typ == TransItemToken.Typs.MODEL))): return res t1 = t if (t.is_hiphen): t1 = t.next0_ if (prev is not None and prev.typ == TransItemToken.Typs.BRAND and t1 is not None): tit = TransItemToken.__attachModel(t1, True, prev) if (tit is not None): tit.begin_token = t return tit if (prev is not None and ((prev.typ == TransItemToken.Typs.NOUN or after_conj))): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None and br.is_quote_type): tit = TransItemToken.tryParse(br.begin_token.next0_, prev, after_conj, False) if (tit is not None and tit.end_token.next0_ == br.end_token): if (not tit.is_doubt or tit.typ == TransItemToken.Typs.BRAND): tit.begin_token = br.begin_token tit.end_token = br.end_token return tit s = MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO) if (not Utils.isNullOrEmpty(s) and (len(s) < 30)): chars_ = 0 digs = 0 un = 0 for c in s: if (not Utils.isWhitespace(c)): if (str.isalpha(c)): chars_ += 1 elif (str.isdigit(c)): digs += 1 else: un += 1 if (((digs == 0 and un == 0 and t.next0_.chars.is_capital_upper)) or prev.kind == TransportKind.SHIP or prev.kind == TransportKind.SPACE): return TransItemToken._new2526( br.begin_token, br.end_token, TransItemToken.Typs.NAME, s) if (digs > 0 and (chars_ < 5)): return TransItemToken._new2526( br.begin_token, br.end_token, TransItemToken.Typs.MODEL, s.replace(" ", "")) if (prev is not None and (((prev.typ == TransItemToken.Typs.NOUN or prev.typ == TransItemToken.Typs.BRAND or prev.typ == TransItemToken.Typs.NAME) or prev.typ == TransItemToken.Typs.MODEL))): tit = TransItemToken.__attachModel( t, prev.typ != TransItemToken.Typs.NAME, prev) if (tit is not None): return tit if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN and prev.kind == TransportKind.AUTO) and (isinstance(t, TextToken)) and t.chars.is_letter) and not t.chars.is_all_lower and (t.whitespaces_before_count < 2)): pt = t.kit.processReferent("PERSON", t) if (pt is None): tit = TransItemToken._new2529(t, t, TransItemToken.Typs.BRAND) tit.value = (t).term return tit if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN and ((prev.kind == TransportKind.SHIP or prev.kind == TransportKind.SPACE)))) or after_conj): if (t.chars.is_capital_upper): ok = True npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None and len(npt.adjectives) > 0): ok = False else: rt = t.kit.processReferent("PERSON", t) if (rt is not None): ok = False if (t.getMorphClassInDictionary().is_proper_surname): if (not t.morph.case_.is_nominative): ok = False if (ok): t1 = t tt = t.next0_ while tt is not None: if (tt.whitespaces_before_count > 1): break if (tt.chars != t.chars): break tit = TransItemToken.tryParse(tt, None, False, False) if ((tit) is not None): break t1 = tt tt = tt.next0_ s = MiscHelper.getTextValue(t, t1, GetTextAttr.NO) if (s is not None): res1 = TransItemToken._new2530( t, t1, TransItemToken.Typs.NAME, True, s) if (not t1.is_newline_after): br = BracketHelper.tryParse( t1.next0_, BracketParseAttr.NO, 100) if (br is not None): res1.end_token = br.end_token res1.alt_value = res1.value res1.value = MiscHelper.getTextValueOfMetaToken( br, GetTextAttr.NO) return res1 return None
def parse(t: 'Token', max_char: int = 0, prev: 'InstrToken' = None) -> 'InstrToken': is_start_of_line = False t00 = t if (t is not None): is_start_of_line = t00.is_newline_before while t is not None: if (t.is_table_control_char and not t.isChar(chr(0x1F))): if (t.is_newline_after and not is_start_of_line): is_start_of_line = True t = t.next0_ else: break if (t is None): return None if (t.is_newline_before): is_start_of_line = True t0 = t t1 = None has_word = False first_pass2988 = True while True: if first_pass2988: first_pass2988 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_newline_before and t != t0): break if (max_char > 0 and t.begin_char > max_char): break if (is_start_of_line and t == t0): if (t.isValue("ГЛАВА", None)): next0__ = InstrToken.parse(t.next0_, 0, None) if (next0__ is not None and next0__.typ == ILTypes.PERSON): next0__.begin_token = t return next0__ tt = None if ((isinstance(t.getReferent(), PersonReferent)) or (isinstance(t.getReferent(), PersonPropertyReferent)) or (isinstance(t.getReferent(), InstrumentParticipant))): return InstrToken.__correctPerson( InstrToken._new1405(t00, t, ILTypes.PERSON, t)) is_ref = False if (isinstance(t.getReferent(), PersonPropertyReferent)): tt = t.next0_ is_ref = True elif (prev is not None and prev.typ == ILTypes.PERSON): rt = t.kit.processReferent(PersonAnalyzer.ANALYZER_NAME, t) if (rt is not None): if (isinstance(rt.referent, PersonReferent)): return InstrToken._new1406(t00, rt.end_token, ILTypes.PERSON) tt = rt.end_token.next0_ cou = 0 t11 = (None if tt is None else tt.previous) first_pass2989 = True while True: if first_pass2989: first_pass2989 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_table_control_char): continue re = tt.getReferent() if (isinstance(re, PersonReferent)): return InstrToken._new1405(t00, tt, ILTypes.PERSON, tt) if (isinstance(re, GeoReferent)): t11 = tt continue if (re is not None): break if (DecreeToken.isKeyword(tt, False) is not None): break if (tt.is_newline_before): cou += 1 if ((cou) > 4): break if (tt is None and is_ref): return InstrToken._new1405(t00, Utils.ifNotNull(t11, t), ILTypes.PERSON, t) dt = DecreeToken.tryAttach(t, None, False) if (dt is not None): if (dt.typ == DecreeToken.ItemType.TYP and not t.chars.is_all_lower): if (t != t0): break has_verb_ = False tt = dt.end_token while tt is not None: if (tt.is_newline_before): break elif ((isinstance(tt, TextToken)) and (tt).is_pure_verb): has_verb_ = True break tt = tt.next0_ if (not has_verb_): res2 = InstrToken._new1409( t0, dt.end_token, ILTypes.TYP, Utils.ifNotNull(dt.full_value, dt.value)) if (res2.value == "ДОПОЛНИТЕЛЬНОЕ СОГЛАШЕНИЕ" or res2.value == "ДОДАТКОВА УГОДА"): if (res2.begin_char > 500 and res2.newlines_before_count > 1): res2.typ = ILTypes.APPENDIX return res2 if (dt.typ == DecreeToken.ItemType.NUMBER): if (t != t0): break return InstrToken._new1409(t0, dt.end_token, ILTypes.REGNUMBER, dt.value) if (dt.typ == DecreeToken.ItemType.ORG): if (t != t0): break return InstrToken._new1411(t0, dt.end_token, ILTypes.ORGANIZATION, dt.ref, dt.value) if (dt.typ == DecreeToken.ItemType.TERR): if (t != t0): break re = InstrToken._new1411(t0, dt.end_token, ILTypes.GEO, dt.ref, dt.value) t1 = re.end_token.next0_ if (t1 is not None and t1.isChar(',')): t1 = t1.next0_ if (t1 is not None and t1.isValue("КРЕМЛЬ", None)): re.end_token = t1 elif ((t1 is not None and t1.isValue("ДОМ", "БУДИНОК") and t1.next0_ is not None) and t1.next0_.isValue("СОВЕТ", "РАД")): re.end_token = t1.next0_ if (t1.next0_.next0_ is not None and (isinstance( t1.next0_.next0_.getReferent(), GeoReferent))): re.end_token = t1.next0_.next0_ return re if (dt.typ == DecreeToken.ItemType.OWNER): if (t != t0): break if (dt.ref is not None and str(dt.ref.referent).startswith("агент")): dt = (None) if (dt is not None): res1 = InstrToken._new1411(t0, dt.end_token, ILTypes.PERSON, dt.ref, dt.value) return InstrToken.__correctPerson(res1) if (BracketHelper.canBeStartOfSequence(t, False, False)): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): t1 = br.end_token t = t1 continue if (t.next0_ is not None and BracketHelper.canBeEndOfSequence( t.next0_, False, None, False)): t1 = t.next0_ t = t1 continue if (isinstance(t, TextToken)): if (t.isChar('_')): t1 = t continue r = t.getReferent() if (isinstance(r, DateReferent)): tt = t if (tt.next0_ is not None and tt.next0_.isCharOf(",;")): tt = tt.next0_ if (not t.is_newline_before and not tt.is_newline_after): t1 = tt continue if (not has_word): return InstrToken._new1405(t, tt, ILTypes.DATE, t) if (t != t0): break has_word = True if (isinstance(r, InstrumentParticipant)): tt = (t).begin_token first_pass2990 = True while True: if first_pass2990: first_pass2990 = False else: tt = tt.next0_ if (not (tt is not None and (tt.end_char < t.end_char))): break rr = tt.getReferent() if (rr is None): continue if ((isinstance(rr, OrganizationReferent)) or (isinstance(rr, BankDataReferent)) or (isinstance(rr, UriReferent))): r = (None) break if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, InstrumentParticipant))): if (t != t0): break if (isinstance(r, InstrumentParticipant)): pass res1 = InstrToken._new1405(t, t, ILTypes.PERSON, t) return InstrToken.__correctPerson(res1) if (isinstance(r, OrganizationReferent)): if (t != t0): break return InstrToken._new1405(t, t, ILTypes.ORGANIZATION, t) if (isinstance(r, DecreePartReferent)): dpr = Utils.asObjectOrNull(r, DecreePartReferent) if (dpr.appendix is not None): if (t.is_newline_before or is_start_of_line): if (t.is_newline_after or t.whitespaces_before_count > 30): return InstrToken._new1409(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ") ok = True tt = t.next0_ first_pass2991 = True while True: if first_pass2991: first_pass2991 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break npt = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.NO, 0) if (npt is not None): tt = npt.end_token continue ok = False break if (ok): return InstrToken._new1409(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ") if ((isinstance(r, DecreeReferent)) and (r).kind == DecreeKind.PUBLISHER and t == t0): res1 = InstrToken._new1406(t, t, ILTypes.APPROVED) tt = t.next0_ first_pass2992 = True while True: if first_pass2992: first_pass2992 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.isCharOf(",;")): continue if ((isinstance(tt.getReferent(), DecreeReferent)) and (tt.getReferent()).kind == DecreeKind.PUBLISHER): res1.end_token = t else: break return res1 if (t.isValue("ЗА", None) and t.next0_ is not None and t.is_newline_before): rr = t.next0_.getReferent() if ((isinstance(rr, PersonReferent)) or (isinstance(rr, PersonPropertyReferent)) or (isinstance(rr, InstrumentParticipant))): if (t != t0): break res1 = InstrToken._new1405(t, t.next0_, ILTypes.PERSON, t.next0_) t = t.next0_.next0_ if ((isinstance(rr, InstrumentParticipant)) and t is not None): r = t.getReferent() if ((r) is not None): if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))): res1.end_token = t res1.ref = (t) return res1 ii = 0 while ii < len(InstrToken._m_directives): if (t.isValue(InstrToken._m_directives[ii], None)): if (t.next0_ is not None and t.next0_.isValue("СЛЕДУЮЩЕЕ", "НАСТУПНЕ")): if (t != t0): break t11 = t.next0_ ok = False if (t11.next0_ is not None and t11.next0_.isCharOf(":.") and t11.next0_.is_newline_after): ok = True t11 = t11.next0_ if (ok): return InstrToken._new1409( t, t11, ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii]) if (t.is_newline_after or ((t.next0_ is not None and t.next0_.isChar(':') and t.next0_.is_newline_after))): if (t != t0): break if (not t.is_newline_before): if ((InstrToken._m_directives_norm[ii] != "ПРИКАЗ" and InstrToken._m_directives_norm[ii] != "ПОСТАНОВЛЕНИЕ" and InstrToken._m_directives_norm[ii] != "НАКАЗ") and InstrToken._m_directives_norm[ii] != "ПОСТАНОВУ"): break return InstrToken._new1409( t, (t if t.is_newline_after else t.next0_), ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii]) break ii += 1 if (t.is_newline_before and t.chars.is_letter and t.length_char == 1): for d in InstrToken._m_directives: t11 = MiscHelper.tryAttachWordByLetters(d, t, True) if (t11 is not None): if (t11.next0_ is not None and t11.next0_.isChar(':')): t11 = t11.next0_ return InstrToken._new1406(t, t11, ILTypes.DIRECTIVE) tte = ((t).begin_token if (isinstance(t, MetaToken)) else t) term = ((tte).term if isinstance(tte, TextToken) else None) if (is_start_of_line and not tte.chars.is_all_lower and t == t0): npt = NounPhraseHelper.tryParse(tte, NounPhraseParseAttr.NO, 0) if (npt is not None and ((term == "ПРИЛОЖЕНИЯ" or term == "ДОДАТКИ"))): # if (tte.Next != null && tte.Next.IsChar(':')) npt = (None) if (npt is not None and npt.morph.case_.is_nominative and (isinstance(npt.end_token, TextToken))): term1 = (npt.end_token).term if (((term1 == "ПРИЛОЖЕНИЕ" or term1 == "ДОДАТОК" or term1 == "МНЕНИЕ") or term1 == "ДУМКА" or term1 == "АКТ") or term1 == "ФОРМА" or term == "ЗАЯВКА"): tt1 = npt.end_token.next0_ dt1 = DecreeToken.tryAttach(tt1, None, False) if (dt1 is not None and dt1.typ == DecreeToken.ItemType.NUMBER): tt1 = dt1.end_token.next0_ elif (isinstance(tt1, NumberToken)): tt1 = tt1.next0_ elif ((isinstance(tt1, TextToken)) and tt1.length_char == 1 and tt1.chars.is_letter): tt1 = tt1.next0_ ok = True if (tt1 is None): ok = False elif (tt1.isValue("В", "У")): ok = False elif (tt1.isValue("К", None) and tt1.is_newline_before): return InstrToken._new1409(t, t, ILTypes.APPENDIX, term1) elif (not tt1.is_newline_before and InstrToken._checkEntered(tt1) is not None): ok = False elif (tt1 == t.next0_ and ((tt1.isChar(':') or ((tt1.isValue("НА", None) and term1 != "ЗАЯВКА"))))): ok = False if (ok): br = BracketHelper.tryParse( tt1, BracketParseAttr.NO, 100) if (br is not None): tt1 = br.end_token.next0_ if (br.end_token.next0_ is None or not br.end_token.is_newline_after or br.end_token.next0_.isCharOf(";,")): ok = False if (tt1 is not None and tt1.isValue( "ПРИЛОЖЕНИЕ", "ДОДАТОК")): ok = False if (prev is not None and prev.typ == ILTypes.APPENDIX): ok = False if (ok): cou = 0 ttt = tte.previous while ttt is not None and (cou < 300): if (ttt.is_table_control_char): if (not ttt.isChar(chr(0x1F))): if (ttt == tte.previous and ttt.isChar(chr(0x1E))): pass else: ok = False break ttt = ttt.previous cou += 1 if (ok): it1 = InstrToken1.parse(t, True, None, 0, None, False, 0, False) if (it1 is not None): if (it1.has_verb): ok = False if (ok and t.previous is not None): ttp = t.previous first_pass2993 = True while True: if first_pass2993: first_pass2993 = False else: ttp = ttp.previous if (not (ttp is not None)): break if (ttp.is_table_control_char and not ttp.isChar(chr(0x1F))): continue if (BracketHelper.isBracket(ttp, False) and not BracketHelper.canBeEndOfSequence( ttp, False, None, False)): continue if (ttp.isCharOf(";:")): ok = False break if ((ok and t.previous is not None and (t.newlines_before_count < 3)) and not t.is_newline_after): lines = 0 ttp = t.previous first_pass2994 = True while True: if first_pass2994: first_pass2994 = False else: ttp = ttp.previous if (not (ttp is not None)): break if (not ttp.is_newline_before): continue while ttp is not None and (ttp.end_char < t.begin_char): if (isinstance(ttp, NumberToken)): pass elif ((isinstance(ttp, TextToken)) and ttp.length_char > 1): if (ttp.isValue( "ПРИЛОЖЕНИЕ", "ДОДАТОК")): ok = False break else: break ttp = ttp.next0_ lines += 1 if ((lines) > 1): break if (ok and ((term1 != "ПРИЛОЖЕНИЕ" and term1 != "ДОДАТОК" and term1 != "МНЕНИЕ"))): if (t.newlines_before_count < 3): ok = False if (ok): return InstrToken._new1409(t, t, ILTypes.APPENDIX, term1) app = False if ((((term == "ОСОБОЕ" or term == "ОСОБЛИВЕ")) and t.next0_ is not None and t.next0_.isValue("МНЕНИЕ", "ДУМКА")) and t == t0 and is_start_of_line): app = True if ((((term == "ДОПОЛНИТЕЛЬНОЕ" or term == "ДОДАТКОВА")) and t.next0_ is not None and t.next0_.isValue("СОГЛАШЕНИЕ", "УГОДА")) and t == t0 and is_start_of_line): app = True if (app): tt = t.next0_ while tt is not None: if (tt.is_newline_before): break elif (tt.getMorphClassInDictionary() == MorphClass.VERB): app = False break tt = tt.next0_ if (app): return InstrToken._new1406(t, t.next0_, ILTypes.APPENDIX) if (not t.chars.is_all_lower and t == t0): tt = InstrToken._checkApproved(t) if (tt is not None): if (tt.next0_ is not None and (isinstance(tt.next0_.getReferent(), DecreeReferent))): return InstrToken._new1405(t, tt, ILTypes.APPROVED, tt.next0_.getReferent()) dt1 = DecreeToken.tryAttach(tt.next0_, None, False) if (dt1 is not None and dt1.typ == DecreeToken.ItemType.TYP): return InstrToken._new1406(t, tt, ILTypes.APPROVED) t1 = t is_start_of_line = False if (t1 is None): return None res = InstrToken._new1406(t00, t1, ILTypes.UNDEFINED) res.no_words = True t = t0 first_pass2995 = True while True: if first_pass2995: first_pass2995 = False else: t = t.next0_ if (not (t is not None and t.end_char <= t1.end_char)): break if (not ((isinstance(t, TextToken)))): if (isinstance(t, ReferentToken)): res.no_words = False continue if (not t.chars.is_letter): continue res.no_words = False if ((t).is_pure_verb): res.has_verb = True if (t0.isValue("ВОПРОС", "ПИТАННЯ") and t0.next0_ is not None and t0.next0_.isCharOf(":.")): res.typ = ILTypes.QUESTION return res
def __analizeListItems(chi: typing.List['FragToken'], ind: int) -> int: if (ind >= len(chi)): return -1 res = chi[ind] ki = res.kind if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM or ki == InstrumentKind.SUBITEM) or ki == InstrumentKind.CLAUSEPART or ki == InstrumentKind.INDENTION): pass else: return -1 if (res.has_changes and res.multiline_changes_value is not None): ci = res.multiline_changes_value cit = FragToken._new1242(ci.begin_token, ci.end_token, InstrumentKind.CITATION) res.children.append(cit) if (BracketHelper.isBracket(cit.begin_token.previous, True)): cit.begin_token = cit.begin_token.previous if (BracketHelper.isBracket(cit.end_token.next0_, True)): cit.end_token = cit.end_token.next0_ if (cit.end_token.next0_ is not None and cit.end_token.next0_.isCharOf(";.")): cit.end_token = cit.end_token.next0_ res.fillByContentChildren() if (res.children[0].has_changes): pass cit_kind = InstrumentKind.UNDEFINED if (isinstance(ci.tag, DecreeChangeReferent)): dcr = Utils.asObjectOrNull(ci.tag, DecreeChangeReferent) if (dcr.value is not None and len(dcr.value.new_items) > 0): mnem = dcr.value.new_items[0] i = mnem.find(' ') if (((i)) > 0): mnem = mnem[0:0 + i] cit_kind = PartToken._getInstrKindByTyp( PartToken._getTypeByAttrName(mnem)) elif (len(dcr.owners) > 0 and (isinstance(dcr.owners[0], DecreePartReferent)) and dcr.kind == DecreeChangeKind.NEW): pat = Utils.asObjectOrNull(dcr.owners[0], DecreePartReferent) min0_ = 0 for s in pat.slots: ty = PartToken._getTypeByAttrName(s.type_name) if (ty == PartToken.ItemType.UNDEFINED): continue l_ = PartToken._getRank(ty) if (l_ == 0): continue if (l_ > min0_ or min0_ == 0): min0_ = l_ cit_kind = PartToken._getInstrKindByTyp(ty) sub = None if (cit_kind != InstrumentKind.UNDEFINED and cit_kind != InstrumentKind.APPENDIX): sub = FragToken(ci.begin_token, ci.end_token) wr = ContentAnalyzeWhapper() wr.analyze(sub, None, True, cit_kind) sub.kind = InstrumentKind.CONTENT else: sub = FragToken.createDocument(ci.begin_token, ci.end_char, cit_kind) if (sub is None or len(sub.children) == 0): pass elif ((sub.kind == InstrumentKind.CONTENT and len(sub.children) > 0 and sub.children[0].begin_token == sub.begin_token) and sub.children[len(sub.children) - 1].end_token == sub.end_token): cit.children.extend(sub.children) else: cit.children.append(sub) return 1 end_char = res.end_char if (res._itok is None): res._itok = InstrToken1.parse(res.begin_token, True, None, 0, None, False, res.end_char, False) lines = ListHelper.LineToken.parseList(res.begin_token, end_char, None) if (lines is None or (len(lines) < 1)): return -1 ret = 1 if (res.kind == InstrumentKind.CONTENT): j = ind + 1 while j < len(chi): if (chi[j].kind == InstrumentKind.CONTENT): lines2 = ListHelper.LineToken.parseList( chi[j].begin_token, chi[j].end_char, lines[len(lines) - 1]) if (lines2 is None or (len(lines2) < 1)): break if (not lines2[0].is_list_item): if ((len(lines2) > 1 and lines2[1].is_list_item and lines2[0].end_token.isCharOf(":")) and not lines2[0].begin_token.chars. is_capital_upper): lines2[0].is_list_item = True else: break lines.extend(lines2) ret = ((j - ind) + 1) elif (chi[j].kind != InstrumentKind.EDITIONS and chi[j].kind != InstrumentKind.COMMENT): break j += 1 if (len(lines) < 2): return -1 if ((len(lines) > 1 and lines[0].is_list_item and lines[1].is_list_item) and lines[0].number != 1): if (len(lines) == 2 or not lines[2].is_list_item): lines[1].is_list_item = False lines[0].is_list_item = lines[1].is_list_item i = 0 first_pass3008 = True while True: if first_pass3008: first_pass3008 = False else: i += 1 if (not (i < len(lines))): break if (lines[i].is_list_item): if (i > 0 and lines[i - 1].is_list_item): continue if (((i + 1) < len(lines)) and lines[i + 1].is_list_item): pass else: lines[i].is_list_item = False continue new_line = False j = (i + 1) while j < len(lines): if (not lines[j].is_list_item): break elif (lines[j].is_newline_before): new_line = True j += 1 if (new_line): continue if (i > 0 and lines[i - 1].end_token.isChar(':')): continue j = i while j < len(lines): if (not lines[j].is_list_item): break else: lines[j].is_list_item = False j += 1 if (len(lines) > 2): last = lines[len(lines) - 1] last2 = lines[len(lines) - 2] if ((not last.is_list_item and last.end_token.isChar('.') and last2.is_list_item) and last2.end_token.isChar(';')): if ((last.length_char < (last2.length_char * 2)) or last.begin_token.chars.is_all_lower): last.is_list_item = True i = 0 while i < (len(lines) - 1): if (not lines[i].is_list_item and not lines[i + 1].is_list_item): if (((i + 2) < len(lines)) and lines[i + 2].is_list_item and lines[i + 1].end_token.isChar(':')): pass else: lines[i].end_token = lines[i + 1].end_token del lines[i + 1] i -= 1 i += 1 i = 0 while i < (len(lines) - 1): if (lines[i].is_list_item): if (lines[i].number == 1): ok = True num = 1 nonum = 0 j = i + 1 while j < len(lines): if (not lines[j].is_list_item): ok = False break elif (lines[j].number > 0): num += 1 if (lines[j].number != num): ok = False break else: nonum += 1 j += 1 if (not ok or nonum == 0 or (num < 2)): break lt = lines[i] j = i + 1 while j < len(lines): if (lines[j].number > 0): lt = lines[j] else: chli = Utils.asObjectOrNull(lt.tag, list) if (chli is None): chli = list() lt.tag = chli lt.end_token = lines[j].end_token chli.append(lines[j]) del lines[j] j -= 1 j += 1 i += 1 cou = 0 for li in lines: if (li.is_list_item): cou += 1 if (cou < 2): return -1 i = 0 first_pass3009 = True while True: if first_pass3009: first_pass3009 = False else: i += 1 if (not (i < len(lines))): break if (lines[i].is_list_item): i0 = i ok = True cou = 1 while i < len(lines): if (not lines[i].is_list_item): break elif (lines[i].number != cou): ok = False i += 1 cou += 1 if (not ok): i = i0 while i < len(lines): if (not lines[i].is_list_item): break else: lines[i].number = 0 i += 1 if (cou > 3 and lines[i0].begin_token.getSourceText() != lines[i0 + 1].begin_token.getSourceText() and lines[i0 + 1].begin_token.getSourceText() == lines[i0 + 2].begin_token.getSourceText()): pref = lines[i0 + 1].begin_token.getSourceText() ok = True j = i0 + 2 while j < i: if (pref != lines[j].begin_token.getSourceText()): ok = False break j += 1 if (not ok): continue tt = None ok = False tt = lines[i0].end_token.previous while tt is not None and tt != lines[i0].begin_token: if (tt.getSourceText() == pref): ok = True break tt = tt.previous if (ok): li0 = ListHelper.LineToken(lines[i0].begin_token, tt.previous) lines[i0].begin_token = tt lines.insert(i0, li0) i += 1 for li in lines: li.correctBeginToken() ch = FragToken._new1259(li.begin_token, li.end_token, (InstrumentKind.LISTITEM if li.is_list_item else InstrumentKind.CONTENT), li.number) if (ch.kind == InstrumentKind.CONTENT and ch.end_token.isChar(':')): ch.kind = InstrumentKind.LISTHEAD res.children.append(ch) chli = Utils.asObjectOrNull(li.tag, list) if (chli is not None): for lt in chli: ch.children.append( FragToken._new1242(lt.begin_token, lt.end_token, InstrumentKind.LISTITEM)) if (ch.begin_char < ch.children[0].begin_char): ch.children.insert( 0, FragToken._new1242(ch.begin_token, ch.children[0].begin_token.previous, InstrumentKind.CONTENT)) return ret