def __can_be_geo_after(tt: 'Token') -> bool: while tt is not None and ((tt.is_comma or BracketHelper.is_bracket(tt, True))): tt = tt.next0_ if (tt is None): return False if (isinstance(tt.get_referent(), GeoReferent)): return True tli = TerrItemToken.try_parse_list(tt, None, 2) if (tli is not None and len(tli) > 1): if (tli[0].termin_item is None and tli[1].termin_item is not None): return True elif (tli[0].termin_item is not None and tli[1].termin_item is None): return True if (CityAttachHelper.check_city_after(tt)): return True if (TerrAttachHelper.try_attach_stateusaterritory(tt) is not None): return True return False
def __try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken': if (t is None): return None if (BracketHelper.is_bracket(t, True)): wit = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, attach_high) if (wit is not None): if (wit.end_token.next0_ is None): wit.begin_token = t return wit if (BracketHelper.is_bracket(wit.end_token.next0_, True)): wit.begin_token = t wit.end_token = wit.end_token.next0_ return wit tok = WeaponItemToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO) if (tok is not None): res = WeaponItemToken(t, tok.end_token) res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs)) if (res.typ == WeaponItemToken.Typs.NOUN): res.value = tok.termin.canonic_text if (tok.termin.tag2 is not None): res.is_doubt = True tt = res.end_token.next0_ first_pass3426 = True while True: if first_pass3426: first_pass3426 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.whitespaces_before_count > 2): break wit = WeaponItemToken.__try_parse(tt, None, False, False) if (wit is not None): if (wit.typ == WeaponItemToken.Typs.BRAND): res.__inner_tokens.append(wit) tt = wit.end_token res.end_token = tt continue break if (not (isinstance(tt, TextToken))): break mc = tt.get_morph_class_in_dictionary() if (mc == MorphClass.ADJECTIVE): if (res.alt_value is None): res.alt_value = res.value if (res.alt_value.endswith(res.value)): res.alt_value = res.alt_value[0:0+len(res.alt_value) - len(res.value)] res.alt_value = "{0}{1} {2}".format(res.alt_value, tt.term, res.value) res.end_token = tt continue break return res if (res.typ == WeaponItemToken.Typs.BRAND or res.typ == WeaponItemToken.Typs.NAME): res.value = tok.termin.canonic_text return res if (res.typ == WeaponItemToken.Typs.MODEL): res.value = tok.termin.canonic_text if (isinstance(tok.termin.tag2, list)): li = Utils.asObjectOrNull(tok.termin.tag2, list) for to in li: wit = WeaponItemToken._new2758(t, tok.end_token, Utils.valToEnum(to.tag, WeaponItemToken.Typs), to.canonic_text, tok.begin_token == tok.end_token) res.__inner_tokens.append(wit) if (to.additional_vars is not None and len(to.additional_vars) > 0): wit.alt_value = to.additional_vars[0].canonic_text res.__correct_model() return res nnn = MiscHelper.check_number_prefix(t) if (nnn is not None): tit = TransItemToken._attach_number(nnn, True) if (tit is not None): res = WeaponItemToken._new2759(t, tit.end_token, WeaponItemToken.Typs.NUMBER) res.value = tit.value res.alt_value = tit.alt_value return res if (((isinstance(t, TextToken)) and t.chars.is_letter and t.chars.is_all_upper) and (t.length_char < 4)): if ((t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.is_char('.'))) and (t.next0_.whitespaces_after_count < 2)) and (isinstance(t.next0_.next0_, NumberToken))): res = WeaponItemToken._new2760(t, t.next0_, WeaponItemToken.Typs.MODEL, True) res.value = t.term res.__correct_model() return res if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after): res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.MODEL, True) res.value = t.term res.__correct_model() return res if (t.term == "СП" and (t.whitespaces_after_count < 3) and (isinstance(t.next0_, TextToken))): pp = WeaponItemToken.__try_parse(t.next0_, None, False, False) if (pp is not None and ((pp.typ == WeaponItemToken.Typs.MODEL or pp.typ == WeaponItemToken.Typs.BRAND))): res = WeaponItemToken._new2759(t, t, WeaponItemToken.Typs.NOUN) res.value = "ПИСТОЛЕТ" res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ" return res if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): ok = False if (prev is not None and ((prev.typ == WeaponItemToken.Typs.NOUN or prev.typ == WeaponItemToken.Typs.MODEL or prev.typ == WeaponItemToken.Typs.BRAND))): ok = True elif (prev is None and t.previous is not None and t.previous.is_comma_and): ok = True if (ok): res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.NAME, True) res.value = t.term if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, TextToken))) and t.next0_.next0_.chars == t.chars): res.value = "{0}-{1}".format(res.value, t.next0_.next0_.term) res.end_token = t.next0_.next0_ if (prev is not None and prev.typ == WeaponItemToken.Typs.NOUN): res.typ = WeaponItemToken.Typs.BRAND if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correct_model() elif (not res.end_token.is_whitespace_after and (isinstance(res.end_token.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correct_model() return res if (t.is_value("МАРКА", None)): res = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, False) if (res is not None and res.typ == WeaponItemToken.Typs.BRAND): res.begin_token = t return res if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100) if (br is not None): return WeaponItemToken._new2764(t, br.end_token, WeaponItemToken.Typs.BRAND, MiscHelper.get_text_value(br.begin_token, br.end_token, GetTextAttr.NO)) if (((isinstance(t, TextToken)) and (isinstance(t.next0_, TextToken)) and t.next0_.length_char > 1) and not t.next0_.chars.is_all_lower): return WeaponItemToken._new2764(t, t.next0_, WeaponItemToken.Typs.BRAND, t.term) if (t.is_value("КАЛИБР", "КАЛІБР")): tt1 = t.next0_ if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): tt1 = tt1.next0_ num = NumbersWithUnitToken.try_parse(tt1, None, False, False, False, False) if (num is not None and num.single_val is not None): return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (isinstance(t, NumberToken)): num = NumbersWithUnitToken.try_parse(t, None, False, False, False, False) if (num is not None and num.single_val is not None): if (len(num.units) == 1 and num.units[0].unit is not None and num.units[0].unit.name_cyr == "мм"): return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (num.end_token.next0_ is not None and num.end_token.next0_.is_value("КАЛИБР", "КАЛІБР")): return WeaponItemToken._new2764(t, num.end_token.next0_, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (t.is_value("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")): tt1 = t.next0_ if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): tt1 = tt1.next0_ if (isinstance(tt1, ReferentToken)): if ((isinstance(tt1.get_referent(), OrganizationReferent)) or (isinstance(tt1.get_referent(), GeoReferent))): return WeaponItemToken._new2769(t, tt1, WeaponItemToken.Typs.DEVELOPER, tt1.get_referent()) return None
def try_attach(t : 'Token', p1 : 'InstrumentParticipantReferent'=None, p2 : 'InstrumentParticipantReferent'=None, is_contract : bool=False) -> 'ParticipantToken': if (t is None): return None tt = t br = False if (p1 is None and p2 is None and is_contract): r1 = t.get_referent() if ((r1 is not None and t.next0_ is not None and t.next0_.is_comma_and) and (isinstance(t.next0_.next0_, ReferentToken))): r2 = t.next0_.next0_.get_referent() if (r1.type_name == r2.type_name): ttt = t.next0_.next0_.next0_ refs = list() refs.append(r1) refs.append(r2) first_pass3282 = True while True: if first_pass3282: first_pass3282 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if ((ttt.is_comma_and and ttt.next0_ is not None and ttt.next0_.get_referent() is not None) and ttt.next0_.get_referent().type_name == r1.type_name): ttt = ttt.next0_ if (not ttt.get_referent() in refs): refs.append(ttt.get_referent()) continue break first_pass3283 = True while True: if first_pass3283: first_pass3283 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_comma or ttt.morph.class0_.is_preposition): continue if ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): continue if (ttt.is_value("ДОГОВАРИВАТЬСЯ", None)): continue npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.noun.is_value("СТОРОНА", None) and npt.morph.number != MorphNumber.SINGULAR): re = ParticipantToken._new1573(t, npt.end_token, ParticipantToken.Kinds.NAMEDASPARTS) re.parts = refs return re break if ((isinstance(r1, OrganizationReferent)) or (isinstance(r1, PersonReferent))): has_br = False has_named = False if (isinstance(r1, PersonReferent)): if (t.previous is not None and t.previous.is_value("ЛИЦО", None)): return None elif (t.previous is not None and ((t.previous.is_value("ВЫДАВАТЬ", None) or t.previous.is_value("ВЫДАТЬ", None)))): return None ttt = t.begin_token while ttt is not None and (ttt.end_char < t.end_char): if (ttt.is_char('(')): has_br = True elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): has_named = True elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): pass elif (isinstance(ttt, ReferentToken)): pass elif (has_br or has_named): npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None) if (npt is None): break if (has_br): if (npt.end_token.next0_ is None or not npt.end_token.next0_.is_char(')')): break if (not has_named): if (ParticipantToken.M_ONTOLOGY.try_parse(ttt, TerminParseAttr.NO) is None): break re = ParticipantToken._new1573(t, t, ParticipantToken.Kinds.NAMEDAS) re.typ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) re.parts = list() re.parts.append(r1) return re ttt = ttt.next0_ has_br = False has_named = False end_side = None brr = None add_refs = None ttt = t.next0_ first_pass3284 = True while True: if first_pass3284: first_pass3284 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if ((isinstance(ttt, NumberToken)) and (isinstance(ttt.next0_, TextToken)) and ttt.next0_.term == "СТОРОНЫ"): ttt = ttt.next0_ end_side = ttt if (ttt.next0_ is not None and ttt.next0_.is_comma): ttt = ttt.next0_ if (ttt.next0_ is not None and ttt.next0_.is_and): break if (brr is not None and ttt.begin_char > brr.end_char): brr = (None) if (BracketHelper.can_be_start_of_sequence(ttt, False, False)): brr = BracketHelper.try_parse(ttt, BracketParseAttr.NO, 100) if (brr is not None and (brr.length_char < 7) and ttt.is_char('(')): ttt = brr.end_token brr = (None) continue elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): has_named = True elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): pass elif (brr is not None or has_named): if (BracketHelper.can_be_start_of_sequence(ttt, True, False)): ttt = ttt.next0_ npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None) typ22 = None if (npt is not None): ttt = npt.end_token if (npt.end_token.is_value("ДОГОВОР", None)): continue else: ttok = None if (isinstance(ttt, MetaToken)): ttok = ParticipantToken.M_ONTOLOGY.try_parse(ttt.begin_token, TerminParseAttr.NO) if (ttok is not None): typ22 = ttok.termin.canonic_text elif (has_named and ttt.morph.class0_.is_adjective): typ22 = ttt.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) elif (brr is not None): continue else: break if (BracketHelper.can_be_end_of_sequence(ttt.next0_, True, None, False)): ttt = ttt.next0_ if (brr is not None): if (ttt.next0_ is None): ttt = brr.end_token continue ttt = ttt.next0_ if (not has_named and typ22 is None): if (ParticipantToken.M_ONTOLOGY.try_parse(npt.begin_token, TerminParseAttr.NO) is None): break re = ParticipantToken._new1573(t, ttt, ParticipantToken.Kinds.NAMEDAS) re.typ = (Utils.ifNotNull(typ22, npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False))) re.parts = list() re.parts.append(r1) return re elif ((ttt.is_value("ЗАРЕГИСТРИРОВАННЫЙ", None) or ttt.is_value("КАЧЕСТВО", None) or ttt.is_value("ПРОЖИВАЮЩИЙ", None)) or ttt.is_value("ЗАРЕГ", None)): pass elif (ttt.get_referent() == r1): pass elif ((isinstance(ttt.get_referent(), PersonIdentityReferent)) or (isinstance(ttt.get_referent(), AddressReferent))): if (add_refs is None): add_refs = list() add_refs.append(ttt.get_referent()) else: prr = ttt.kit.process_referent("PERSONPROPERTY", ttt) if (prr is not None): ttt = prr.end_token continue if (isinstance(ttt.get_referent(), GeoReferent)): continue npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None) if (npt is not None): if ((npt.noun.is_value("МЕСТО", None) or npt.noun.is_value("ЖИТЕЛЬСТВО", None) or npt.noun.is_value("ПРЕДПРИНИМАТЕЛЬ", None)) or npt.noun.is_value("ПОЛ", None) or npt.noun.is_value("РОЖДЕНИЕ", None)): ttt = npt.end_token continue if (ttt.is_newline_before): break if (ttt.length_char < 3): continue mc = ttt.get_morph_class_in_dictionary() if (mc.is_adverb or mc.is_adjective): continue if (ttt.chars.is_all_upper): continue break if (end_side is not None or ((add_refs is not None and t.previous is not None and t.previous.is_and))): re = ParticipantToken._new1573(t, Utils.ifNotNull(end_side, t), ParticipantToken.Kinds.NAMEDAS) re.typ = (None) re.parts = list() re.parts.append(r1) if (add_refs is not None): re.parts.extend(add_refs) return re too = ParticipantToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO) if (too is not None): if ((isinstance(t.previous, TextToken)) and t.previous.is_value("ЛИЦО", None)): too = (None) if (too is not None and too.termin.tag is not None and too.termin.canonic_text != "СТОРОНА"): tt1 = too.end_token.next0_ if (tt1 is not None): if (tt1.is_hiphen or tt1.is_char(':')): tt1 = tt1.next0_ if (isinstance(tt1, ReferentToken)): r1 = tt1.get_referent() if ((isinstance(r1, PersonReferent)) or (isinstance(r1, OrganizationReferent))): re = ParticipantToken._new1573(t, tt1, ParticipantToken.Kinds.NAMEDAS) re.typ = too.termin.canonic_text re.parts = list() re.parts.append(r1) return re add_typ1 = (None if p1 is None else p1.typ) add_typ2 = (None if p2 is None else p2.typ) if (BracketHelper.can_be_start_of_sequence(tt, False, False) and tt.next0_ is not None): br = True tt = tt.next0_ term1 = None term2 = None if (add_typ1 is not None and add_typ1.find(' ') > 0 and not add_typ1.startswith("СТОРОНА")): term1 = Termin(add_typ1) if (add_typ2 is not None and add_typ2.find(' ') > 0 and not add_typ2.startswith("СТОРОНА")): term2 = Termin(add_typ2) named = False typ_ = None t1 = None t0 = tt first_pass3285 = True while True: if first_pass3285: first_pass3285 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.morph.class0_.is_preposition and typ_ is not None): continue if (tt.is_char_of("(:)") or tt.is_hiphen): continue if (tt.is_table_control_char): break if (tt.is_newline_before and tt != t0): if (isinstance(tt, NumberToken)): break if ((isinstance(tt, TextToken)) and (isinstance(tt.previous, TextToken))): if (tt.previous.is_value(tt.term, None)): break if (BracketHelper.is_bracket(tt, False)): continue tok = (ParticipantToken.M_ONTOLOGY.try_parse(tt, TerminParseAttr.NO) if ParticipantToken.M_ONTOLOGY is not None else None) if (tok is not None and (isinstance(tt.previous, TextToken))): if (tt.previous.is_value("ЛИЦО", None)): return None if (tok is None): if (add_typ1 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ1, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ1, None)))))): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): break typ_ = add_typ1 t1 = tt continue if (add_typ2 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ2, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ2, None)))))): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): break typ_ = add_typ2 t1 = tt continue if (tt.chars.is_letter): if (term1 is not None): tok1 = term1.try_parse(tt, TerminParseAttr.NO) if (tok1 is not None): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): break typ_ = add_typ1 tt = tok1.end_token t1 = tt continue if (term2 is not None): tok2 = term2.try_parse(tt, TerminParseAttr.NO) if (tok2 is not None): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): break typ_ = add_typ2 tt = tok2.end_token t1 = tt continue if (named and tt.get_morph_class_in_dictionary().is_noun): if (not tt.chars.is_all_lower or BracketHelper.is_bracket(tt.previous, True)): if (DecreeToken.is_keyword(tt, False) is None): val = tt.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) if (typ_ is not None): if (not ParticipantToken.__is_types_equal(typ_, val)): break typ_ = val t1 = tt continue if (named and typ_ is None and is_contract): if ((isinstance(tt, TextToken)) and tt.chars.is_cyrillic_letter and tt.chars.is_capital_upper): dc = tt.get_morph_class_in_dictionary() if (dc.is_undefined or dc.is_noun): dt = DecreeToken.try_attach(tt, None, False) ok = True if (dt is not None): ok = False elif (tt.is_value("СТОРОНА", None)): ok = False if (ok): typ_ = tt.lemma t1 = tt continue if (dc.is_adjective): npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt is not None and len(npt.adjectives) > 0 and npt.noun.get_morph_class_in_dictionary().is_noun): typ_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) t1 = npt.end_token continue if (tt == t): break if ((isinstance(tt, NumberToken)) or tt.is_char('.')): break if (tt.length_char < 4): if (typ_ is not None): continue break if (tok.termin.tag is None): named = True else: if (typ_ is not None): break if (tok.termin.canonic_text == "СТОРОНА"): tt1 = tt.next0_ if (tt1 is not None and tt1.is_hiphen): tt1 = tt1.next0_ if (not (isinstance(tt1, NumberToken))): break if (tt1.is_newline_before): break typ_ = "{0} {1}".format(tok.termin.canonic_text, tt1.value) t1 = tt1 else: typ_ = tok.termin.canonic_text t1 = tok.end_token break tt = tok.end_token if (typ_ is None): return None if (not named and t1 != t and not typ_.startswith("СТОРОНА")): if (not ParticipantToken.__is_types_equal(typ_, add_typ1) and not ParticipantToken.__is_types_equal(typ_, add_typ2)): return None if (BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): t1 = t1.next0_ if (not t.is_whitespace_before and BracketHelper.can_be_start_of_sequence(t.previous, False, False)): t = t.previous elif (BracketHelper.can_be_start_of_sequence(t, False, False) and BracketHelper.can_be_end_of_sequence(t1.next0_, True, t, True)): t1 = t1.next0_ if (br and t1.next0_ is not None and BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): t1 = t1.next0_ res = ParticipantToken._new1578(t, t1, (ParticipantToken.Kinds.NAMEDAS if named else ParticipantToken.Kinds.PURE), typ_) if (t.is_char(':')): res.begin_token = t.next0_ return res
def __try_attach(t: 'Token', prev: typing.List['DateItemToken'], detail_regime: bool) -> 'DateItemToken': from pullenti.ner.measure.internal.MeasureToken import MeasureToken if (t is None): return None nt = Utils.asObjectOrNull(t, NumberToken) begin = t end = t is_in_brack = False if ((BracketHelper.can_be_start_of_sequence(t, False, False) and t.next0_ is not None and (isinstance(t.next0_, NumberToken))) and BracketHelper.can_be_end_of_sequence( t.next0_.next0_, False, None, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if ((t.is_newline_before and BracketHelper.is_bracket(t, False) and (isinstance(t.next0_, NumberToken))) and BracketHelper.is_bracket(t.next0_.next0_, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if (nt is not None): if (nt.int_value is None): return None if (nt.typ == NumberSpellingType.WORDS): if (nt.morph.class0_.is_noun and not nt.morph.class0_.is_adjective): if (t.next0_ is not None and ((t.next0_.is_value("КВАРТАЛ", None) or t.next0_.is_value("ПОЛУГОДИЕ", None) or t.next0_.is_value("ПІВРІЧЧЯ", None)))): pass else: return None if (NumberHelper.try_parse_age(nt) is not None): return None tt = None res = DateItemToken._new628(begin, end, DateItemToken.DateItemType.NUMBER, nt.int_value, nt.morph) if ((res.int_value == 20 and (isinstance(nt.next0_, NumberToken)) and nt.next0_.int_value is not None) and nt.next0_.length_char == 2 and prev is not None): num = 2000 + nt.next0_.int_value if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): ok = False if (nt.whitespaces_after_count == 1): ok = True elif (nt.is_newline_after and nt.is_newline_after): ok = True if (ok): nt = (Utils.asObjectOrNull(nt.next0_, NumberToken)) res.end_token = nt res.int_value = num if (res.int_value == 20 or res.int_value == 201): tt = t.next0_ if (tt is not None and tt.is_char('_')): while tt is not None: if (not tt.is_char('_')): break tt = tt.next0_ tt = DateItemToken.__test_year_rus_word(tt, False) if (tt is not None): res.int_value = 0 res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR return res if (res.int_value <= 12 and t.next0_ is not None and (t.whitespaces_after_count < 3)): tt = t.next0_ if (tt.is_value("ЧАС", None)): if (((isinstance(t.previous, TextToken)) and not t.previous.chars.is_letter and not t.is_whitespace_before) and (isinstance(t.previous.previous, NumberToken)) and not t.previous.is_whitespace_before): pass else: res.typ = DateItemToken.DateItemType.HOUR res.end_token = tt tt = tt.next0_ if (tt is not None and tt.is_char('.')): res.end_token = tt tt = tt.next0_ first_pass3072 = True while True: if first_pass3072: first_pass3072 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_value("УТРО", "РАНОК")): res.end_token = tt res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_value("ВЕЧЕР", "ВЕЧІР")): res.end_token = tt res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_value("ДЕНЬ", None)): res.end_token = tt if (res.int_value < 10): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_value("НОЧЬ", "НІЧ")): res.end_token = tt if (res.int_value == 12): res.int_value = 0 elif (res.int_value > 9): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_comma or tt.morph.class0_.is_adverb): continue break if (res.typ == DateItemToken.DateItemType.HOUR): return res can_be_year_ = True if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): pass elif ((prev is not None and len(prev) >= 4 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM) and prev[len(prev) - 2].can_by_month): pass elif (nt.next0_ is not None and ((nt.next0_.is_value("ГОД", None) or nt.next0_.is_value("РІК", None)))): if (res.int_value < 1000): can_be_year_ = False tt = DateItemToken.__test_year_rus_word(nt.next0_, False) if (tt is not None and DateItemToken.__is_new_age(tt.next0_)): res.typ = DateItemToken.DateItemType.YEAR res.end_token = tt elif (can_be_year_): if (res.can_be_year or res.typ == DateItemToken.DateItemType.NUMBER): tt = DateItemToken.__test_year_rus_word( nt.next0_, res.is_newline_before) if ((tt) is not None): if ((tt.is_value("Г", None) and not tt.is_whitespace_before and t.previous is not None) and ((t.previous.is_value("КОРПУС", None) or t.previous.is_value("КОРП", None)))): pass elif ( (((nt.next0_.is_value("Г", None) and (t.whitespaces_before_count < 3) and t.previous is not None) and t.previous.is_value("Я", None) and t.previous.previous is not None) and t.previous.previous.is_char_of("\\/") and t.previous.previous.previous is not None) and t.previous.previous.previous.is_value( "А", None)): return None elif (nt.next0_.length_char == 1 and not res.can_be_year and ((prev is None or ((len(prev) > 0 and prev[len(prev) - 1].typ != DateItemToken.DateItemType.DELIM))))): pass else: res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language elif (tt is not None and (nt.whitespaces_after_count < 2) and (nt.end_char - nt.begin_char) == 1): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language if (nt.previous is not None): if (nt.previous.is_value("В", "У") or nt.previous.is_value("К", None) or nt.previous.is_value("ДО", None)): tt = DateItemToken.__test_year_rus_word(nt.next0_, False) if ((tt) is not None): ok = False if ((res.int_value < 100) and (isinstance(tt, TextToken)) and ((tt.term == "ГОДА" or tt.term == "РОКИ"))): pass else: ok = True if (nt.previous.is_value("ДО", None) and nt.next0_.is_value("Г", None)): cou = 0 ttt = nt.previous.previous while ttt is not None and (cou < 10): mt = MeasureToken.try_parse( ttt, None, False, False, False, False) if (mt is not None and mt.end_char > nt.end_char): ok = False break ttt = ttt.previous cou += 1 if (ok): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language res.begin_token = nt.previous elif (((nt.previous.is_value("IN", None) or nt.previous.is_value("SINCE", None))) and res.can_be_year): uu = (NumbersWithUnitToken.try_parse( nt, None, False, False, False, False) if nt.previous.is_value("IN", None) else None) if (uu is not None and len(uu.units) > 0): pass else: res.typ = DateItemToken.DateItemType.YEAR res.begin_token = nt.previous elif (nt.previous.is_value("NEL", None) or nt.previous.is_value("DEL", None)): if (res.can_be_year): res.typ = DateItemToken.DateItemType.YEAR res.lang = MorphLang.IT res.begin_token = nt.previous elif (nt.previous.is_value("IL", None) and res.can_be_day): res.lang = MorphLang.IT res.begin_token = nt.previous t1 = res.end_token.next0_ if (t1 is not None): if (t1.is_value("ЧАС", "ГОДИНА") or t1.is_value("HOUR", None)): if ((((prev is not None and len(prev) == 2 and prev[0].can_be_hour) and prev[1].typ == DateItemToken.DateItemType.DELIM and not prev[1].is_whitespace_after) and not prev[1].is_whitespace_after and res.int_value >= 0) and (res.int_value < 59)): prev[0].typ = DateItemToken.DateItemType.HOUR res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif (res.int_value < 24): if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.HOUR res.end_token = t1 elif ((res.int_value < 60) and ((t1.is_value("МИНУТА", "ХВИЛИНА") or t1.is_value( "МИН", None) or t.is_value("MINUTE", None)))): if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif ( (res.int_value < 60) and ((t1.is_value("СЕКУНДА", None) or t1.is_value("СЕК", None) or t1.is_value("SECOND", None)))): if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.SECOND res.end_token = t1 elif ((res.int_value < 30) and ((t1.is_value("ВЕК", "ВІК") or t1.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")))): res.typ = DateItemToken.DateItemType.CENTURY res.end_token = t1 elif (res.int_value <= 4 and t1.is_value("КВАРТАЛ", None)): res.typ = DateItemToken.DateItemType.QUARTAL res.end_token = t1 elif (res.int_value <= 2 and ((t1.is_value("ПОЛУГОДИЕ", None) or t1.is_value("ПІВРІЧЧЯ", None)))): res.typ = DateItemToken.DateItemType.HALFYEAR res.end_token = t1 return res t0 = Utils.asObjectOrNull(t, TextToken) if (t0 is None): return None txt = t0.get_source_text() if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х') or txt[0] == 'V'): lat = NumberHelper.try_parse_roman(t) if (lat is not None and lat.end_token.next0_ is not None and lat.int_value is not None): val = lat.int_value tt = lat.end_token.next0_ if (tt.is_value("КВАРТАЛ", None) and val > 0 and val <= 4): return DateItemToken._new629( t, tt, DateItemToken.DateItemType.QUARTAL, val) if (tt.is_value("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0 and val <= 2): return DateItemToken._new629( t, lat.end_token.next0_, DateItemToken.DateItemType.HALFYEAR, val) if (tt.is_value("ВЕК", "ВІК") or tt.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")): return DateItemToken._new629( t, lat.end_token.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.is_value("В", None) and tt.next0_ is not None and tt.next0_.is_char('.')): if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.POINTER): return DateItemToken._new629( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (DateItemToken.__is_new_age(tt.next0_.next0_)): return DateItemToken._new629( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.is_hiphen): lat2 = NumberHelper.try_parse_roman(tt.next0_) if (lat2 is not None and lat2.int_value is not None and lat2.end_token.next0_ is not None): if (lat2.end_token.next0_.is_value("ВЕК", "ВІК") or lat2.end_token.next0_.is_value( "СТОЛЕТИЕ", "СТОЛІТТЯ")): ddd = DateItemToken.try_attach( tt.next0_, None, False) return DateItemToken._new634( t, lat.end_token, DateItemToken.DateItemType.CENTURY, val, ((ddd.new_age if ddd is not None else 0))) if (t is not None and t.is_value("НАПРИКІНЦІ", None)): return DateItemToken._new635(t, t, DateItemToken.DateItemType.POINTER, "конец") if (t is not None and t.is_value("ДОНЕДАВНА", None)): return DateItemToken._new635(t, t, DateItemToken.DateItemType.POINTER, "сегодня") if (prev is None): if (t is not None): if (t.is_value("ОКОЛО", "БІЛЯ") or t.is_value("ПРИМЕРНО", "ПРИБЛИЗНО") or t.is_value("ABOUT", None)): return DateItemToken._new635( t, t, DateItemToken.DateItemType.POINTER, "около") if (t.is_value("ОК", None) or t.is_value("OK", None)): if (t.next0_ is not None and t.next0_.is_char('.')): return DateItemToken._new635( t, t.next0_, DateItemToken.DateItemType.POINTER, "около") return DateItemToken._new635( t, t, DateItemToken.DateItemType.POINTER, "около") tok = DateItemToken.M_SEASONS.try_parse(t, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = t.term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new629( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None): tok = DateItemToken.M_SEASONS.try_parse(npt.end_token, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = t.term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new629( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) typ_ = DateItemToken.DateItemType.NUMBER if (npt.noun.is_value("КВАРТАЛ", None)): typ_ = DateItemToken.DateItemType.QUARTAL elif (npt.end_token.is_value("ПОЛУГОДИЕ", None) or npt.end_token.is_value("ПІВРІЧЧЯ", None)): typ_ = DateItemToken.DateItemType.HALFYEAR elif (npt.end_token.is_value("НАЧАЛО", None) or npt.end_token.is_value("ПОЧАТОК", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "начало") elif (npt.end_token.is_value("СЕРЕДИНА", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "середина") elif (npt.end_token.is_value("КОНЕЦ", None) or npt.end_token.is_value("КІНЕЦЬ", None) or npt.end_token.is_value("НАПРИКІНЕЦЬ", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "конец") elif (npt.end_token.is_value("ВРЕМЯ", None) and len(npt.adjectives) > 0 and npt.end_token.previous.is_value("НАСТОЯЩЕЕ", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") elif (npt.end_token.is_value("ЧАС", None) and len(npt.adjectives) > 0 and npt.end_token.previous.is_value("ДАНИЙ", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") if (typ_ != DateItemToken.DateItemType.NUMBER or detail_regime): delta = 0 if (len(npt.adjectives) > 0): if (npt.adjectives[0].is_value("ПОСЛЕДНИЙ", "ОСТАННІЙ")): return DateItemToken._new629( t0, npt.end_token, typ_, (4 if typ_ == DateItemToken.DateItemType.QUARTAL else 2)) if (npt.adjectives[0].is_value("ПРЕДЫДУЩИЙ", "ПОПЕРЕДНІЙ") or npt.adjectives[0].is_value("ПРОШЛЫЙ", None)): delta = -1 elif (npt.adjectives[0].is_value("СЛЕДУЮЩИЙ", None) or npt.adjectives[0].is_value("ПОСЛЕДУЮЩИЙ", None) or npt.adjectives[0].is_value("НАСТУПНИЙ", None)): delta = 1 else: return None cou = 0 tt = t.previous first_pass3073 = True while True: if first_pass3073: first_pass3073 = False else: tt = tt.previous if (not (tt is not None)): break if (cou > 200): break dr = Utils.asObjectOrNull(tt.get_referent(), DateRangeReferent) if (dr is None): continue if (typ_ == DateItemToken.DateItemType.QUARTAL): ii = dr.quarter_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 4): continue return DateItemToken._new629(t0, npt.end_token, typ_, ii) if (typ_ == DateItemToken.DateItemType.HALFYEAR): ii = dr.halfyear_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 2): continue return DateItemToken._new629(t0, npt.end_token, typ_, ii) term = t0.term if (not str.isalnum(term[0])): if (t0.is_char_of(".\\/:") or t0.is_hiphen): return DateItemToken._new635(t0, t0, DateItemToken.DateItemType.DELIM, term) elif (t0.is_char(',')): return DateItemToken._new635(t0, t0, DateItemToken.DateItemType.DELIM, term) else: return None if (term == "O" or term == "О"): if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after and len(t.next0_.value) == 1): return DateItemToken._new629(t, t.next0_, DateItemToken.DateItemType.NUMBER, t.next0_.int_value) if (str.isalpha(term[0])): inf = DateItemToken.M_MONTHES.try_parse(t, TerminParseAttr.NO) if (inf is not None and inf.termin.tag is None): inf = DateItemToken.M_MONTHES.try_parse( inf.end_token.next0_, TerminParseAttr.NO) if (inf is not None and (isinstance(inf.termin.tag, int))): return DateItemToken._new653(inf.begin_token, inf.end_token, DateItemToken.DateItemType.MONTH, inf.termin.tag, inf.termin.lang) return None
def create_nickname(pr : 'PersonReferent', t : 'Token') -> 'Token': has_keyw = False is_br = False first_pass3367 = True while True: if first_pass3367: first_pass3367 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_hiphen or t.is_comma or t.is_char_of(".:;")): continue if (t.morph.class0_.is_preposition): continue if (t.is_char('(')): is_br = True continue if ((t.is_value("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.is_value("КЛИЧКА", None) or t.is_value("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.is_value("ПСЕВДО", None) or t.is_value("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): has_keyw = True continue break if (not has_keyw or t is None): return None if (BracketHelper.is_bracket(t, True)): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = br.end_token tt = t.next0_ first_pass3368 = True while True: if first_pass3368: first_pass3368 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma_and): continue if (not BracketHelper.is_bracket(tt, True)): break br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is None): break ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) tt = br.end_token t = tt if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ return t else: ret = None first_pass3369 = True while True: if first_pass3369: first_pass3369 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_comma_and): continue if (ret is not None and t.chars.is_all_lower): break if (t.whitespaces_before_count > 2): break pli = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.NO, 10) if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): ni = MiscHelper.get_text_value(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = pli[len(pli) - 1].end_token if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ ret = t continue if ((isinstance(t, ReferentToken)) and not t.chars.is_all_lower and t.begin_token == t.end_token): val = MiscHelper.get_text_value_of_meta_token(Utils.asObjectOrNull(t, ReferentToken), GetTextAttr.NO) pr.add_slot(PersonReferent.ATTR_NICKNAME, val, False, 0) if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ ret = t continue break return ret return None
def attach_first(self, p : 'InstrumentParticipantReferent', min_char : int, max_char : int) -> 'ReferentToken': t = None tt0 = self.begin_token refs = list() t = tt0.previous first_pass3287 = True while True: if first_pass3287: first_pass3287 = False else: t = t.previous if (not (t is not None and t.begin_char >= min_char)): break if (t.is_newline_after): if (t.newlines_after_count > 1): break if (isinstance(t.next0_, NumberToken)): break tt = ParticipantToken.__try_attach_contract_ground(t, p, False) if (tt is not None): continue r = t.get_referent() if (((((isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent)) or (isinstance(r, PersonReferent))) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, AddressReferent))) or (isinstance(r, UriReferent)) or (isinstance(r, PersonIdentityReferent))) or (isinstance(r, BankDataReferent))): if (not r in refs): refs.insert(0, r) tt0 = t if (len(refs) > 0): for r in refs: if (r != refs[0] and (isinstance(refs[0], OrganizationReferent)) and (((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))))): p.add_slot(InstrumentParticipantReferent.ATTR_DELEGATE, r, False, 0) else: p.add_slot(InstrumentParticipantReferent.ATTR_REF, r, False, 0) rt = ReferentToken(p, tt0, self.end_token) t = self.end_token.next0_ if (BracketHelper.is_bracket(t, False)): t = t.next0_ if (t is not None and t.is_char(',')): t = t.next0_ first_pass3288 = True while True: if first_pass3288: first_pass3288 = False else: t = t.next0_ if (not (t is not None and ((max_char == 0 or t.begin_char <= max_char)))): break if (t.is_value("СТОРОНА", None)): break r = t.get_referent() if (((((isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent)) or (isinstance(r, PersonReferent))) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, AddressReferent))) or (isinstance(r, UriReferent)) or (isinstance(r, PersonIdentityReferent))) or (isinstance(r, BankDataReferent))): if ((((isinstance(r, PersonPropertyReferent)) and t.next0_ is not None and t.next0_.is_comma) and (isinstance(t.next0_.next0_, ReferentToken)) and (isinstance(t.next0_.next0_.get_referent(), PersonReferent))) and not t.next0_.is_newline_after): pe = Utils.asObjectOrNull(t.next0_.next0_.get_referent(), PersonReferent) pe.add_slot(PersonReferent.ATTR_ATTR, r, False, 0) r = (pe) t = t.next0_.next0_ is_delegate = False if (t.previous.is_value("ЛИЦО", None) or t.previous.is_value("ИМЯ", None)): is_delegate = True if (t.previous.is_value("КОТОРЫЙ", None) and t.previous.previous is not None and ((t.previous.previous.is_value("ИМЯ", None) or t.previous.previous.is_value("ЛИЦО", None)))): is_delegate = True p.add_slot((InstrumentParticipantReferent.ATTR_DELEGATE if (((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)))) and is_delegate else InstrumentParticipantReferent.ATTR_REF), r, False, 0) rt.end_token = t continue tt = ParticipantToken.__try_attach_contract_ground(t, p, False) if (tt is not None): rt.end_token = tt t = rt.end_token if (rt.begin_char == tt.begin_char): rt.begin_token = tt continue if (t.is_value("В", None) and t.next0_ is not None and t.next0_.is_value("ЛИЦО", None)): t = t.next0_ continue if (t.is_value("ОТ", None) and t.next0_ is not None and t.next0_.is_value("ИМЯ", None)): t = t.next0_ continue if (t.is_value("ПО", None) and t.next0_ is not None and t.next0_.is_value("ПОРУЧЕНИЕ", None)): t = t.next0_ continue if (t.is_newline_before): break if (t.get_morph_class_in_dictionary() == MorphClass.VERB): if ((not t.is_value("УДОСТОВЕРЯТЬ", None) and not t.is_value("ПРОЖИВАТЬ", None) and not t.is_value("ЗАРЕГИСТРИРОВАТЬ", None)) and not t.is_value("ДЕЙСТВОВАТЬ", None)): break if (t.is_and and t.previous is not None and t.previous.is_comma): break if (t.is_and and t.next0_.get_referent() is not None): if (isinstance(t.next0_.get_referent(), OrganizationReferent)): break pe = Utils.asObjectOrNull(t.next0_.get_referent(), PersonReferent) if (pe is not None): has_ip = False for s in pe.slots: if (s.type_name == PersonReferent.ATTR_ATTR): if (str(s.value).startswith("индивидуальный предприниматель")): has_ip = True break if (has_ip): break t = rt.begin_token while t is not None and t.end_char <= rt.end_char: tt = ParticipantToken.__try_attach_contract_ground(t, p, True) if (tt is not None): if (tt.end_char > rt.end_char): rt.end_token = tt t = tt t = t.next0_ return rt
def get_name_ex(begin: 'Token', end: 'Token', cla: 'MorphClass', mc: 'MorphCase', gender: 'MorphGender' = MorphGender.UNDEFINED, ignore_brackets_and_hiphens: bool = False, ignore_geo_referent: bool = False) -> str: if (end is None or begin is None): return None if (begin.end_char > end.begin_char and begin != end): return None res = io.StringIO() prefix = None t = begin first_pass3064 = True while True: if first_pass3064: first_pass3064 = False else: t = t.next0_ if (not (t is not None and t.end_char <= end.end_char)): break if (res.tell() > 1000): break if (t.is_table_control_char): continue if (ignore_brackets_and_hiphens): if (BracketHelper.is_bracket(t, False)): if (t == end): break if (t.is_char_of("(<[")): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None and br.end_char <= end.end_char): tmp = ProperNameHelper.get_name_ex( br.begin_token.next0_, br.end_token.previous, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, ignore_brackets_and_hiphens, False) if (tmp is not None): if ((br.end_char == end.end_char and br.begin_token.next0_ == br.end_token.previous and not br.begin_token.next0_.chars.is_letter) and not (isinstance( br.begin_token.next0_, ReferentToken))): pass else: print(" {0}{1}{2}".format( t.get_source_text(), tmp, br.end_token.get_source_text()), end="", file=res, flush=True) t = br.end_token continue if (t.is_hiphen): if (t == end): break elif (t.is_whitespace_before or t.is_whitespace_after): continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is not None): if (not ignore_brackets_and_hiphens): if ((tt.next0_ is not None and tt.next0_.is_hiphen and (isinstance(tt.next0_.next0_, TextToken))) and tt != end and tt.next0_ != end): if (prefix is None): prefix = tt.term else: prefix = "{0}-{1}".format(prefix, tt.term) t = tt.next0_ if (t == end): break else: continue s = None if (cla.value != (0) or not mc.is_undefined or gender != MorphGender.UNDEFINED): for wff in tt.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (cla.value != (0)): if ((((wf.class0_.value) & (cla.value))) == 0): continue if (not mc.is_undefined): if (((wf.case_) & mc).is_undefined): continue if (gender != MorphGender.UNDEFINED): if (((wf.gender) & (gender)) == (MorphGender.UNDEFINED)): continue if (s is None or wf.normal_case == tt.term): s = wf.normal_case if (s is None and gender != MorphGender.UNDEFINED): for wff in tt.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (cla.value != (0)): if ((((wf.class0_.value) & (cla.value))) == 0): continue if (not mc.is_undefined): if (((wf.case_) & mc).is_undefined): continue if (s is None or wf.normal_case == tt.term): s = wf.normal_case if (s is None): s = tt.term if (tt.chars.is_last_lower and tt.length_char > 2): s = tt.get_source_text() for i in range(len(s) - 1, -1, -1): if (str.isupper(s[i])): s = s[0:0 + i + 1] break if (prefix is not None): delim = "-" if (ignore_brackets_and_hiphens): delim = " " s = "{0}{1}{2}".format(prefix, delim, s) prefix = (None) if (res.tell() > 0 and len(s) > 0): if (str.isalnum(s[0])): ch0 = Utils.getCharAtStringIO(res, res.tell() - 1) if (ch0 == '-'): pass else: print(' ', end="", file=res) elif (not ignore_brackets_and_hiphens and BracketHelper.can_be_start_of_sequence( tt, False, False)): print(' ', end="", file=res) print(s, end="", file=res) elif (isinstance(t, NumberToken)): if (res.tell() > 0): if (not t.is_whitespace_before and Utils.getCharAtStringIO( res, res.tell() - 1) == '-'): pass else: print(' ', end="", file=res) nt = Utils.asObjectOrNull(t, NumberToken) if ((t.morph.class0_.is_adjective and nt.typ == NumberSpellingType.WORDS and nt.begin_token == nt.end_token) and (isinstance(nt.begin_token, TextToken))): print(nt.begin_token.term, end="", file=res) else: print(nt.value, end="", file=res) elif (isinstance(t, MetaToken)): if ((ignore_geo_referent and t != begin and t.get_referent() is not None) and t.get_referent().type_name == "GEO"): continue s = ProperNameHelper.get_name_ex(t.begin_token, t.end_token, cla, mc, gender, ignore_brackets_and_hiphens, ignore_geo_referent) if (not Utils.isNullOrEmpty(s)): if (res.tell() > 0): if (not t.is_whitespace_before and Utils.getCharAtStringIO( res, res.tell() - 1) == '-'): pass else: print(' ', end="", file=res) print(s, end="", file=res) if (t == end): break if (res.tell() == 0): return None return Utils.toStringStringIO(res)
def process(self, kit: 'AnalysisKit') -> None: ad = kit.get_analyzer_data(self) models = TerminCollection() objs_by_model = dict() obj_by_names = TerminCollection() t = kit.first_token first_pass3428 = True while True: if first_pass3428: first_pass3428 = False else: t = t.next0_ if (not (t is not None)): break its = WeaponItemToken.try_parse_list(t, 10) if (its is None): continue rts = self.__try_attach(its, False) if (rts is not None): for rt in rts: rt.referent = ad.register_referent(rt.referent) kit.embed_token(rt) t = (rt) for s in rt.referent.slots: if (s.type_name == WeaponReferent.ATTR_MODEL): mod = str(s.value) for k in range(2): if (not str.isdigit(mod[0])): li = [] wrapli2804 = RefOutArgWrapper(None) inoutres2805 = Utils.tryGetValue( objs_by_model, mod, wrapli2804) li = wrapli2804.value if (not inoutres2805): li = list() objs_by_model[mod] = li if (not rt.referent in li): li.append(rt.referent) models.add_string(mod, li, None, False) if (k > 0): break brand = rt.referent.get_string_value( WeaponReferent.ATTR_BRAND) if (brand is None): break mod = "{0} {1}".format(brand, mod) elif (s.type_name == WeaponReferent.ATTR_NAME): obj_by_names.add( Termin._new100(str(s.value), rt.referent)) if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0): return t = kit.first_token first_pass3429 = True while True: if first_pass3429: first_pass3429 = False else: t = t.next0_ if (not (t is not None)): break br = BracketHelper.try_parse(t, BracketParseAttr.NO, 10) if (br is not None): toks = obj_by_names.try_parse(t.next0_, TerminParseAttr.NO) if (toks is not None and toks.end_token.next0_ == br.end_token): rt0 = ReferentToken( Utils.asObjectOrNull(toks.termin.tag, Referent), br.begin_token, br.end_token) kit.embed_token(rt0) t = (rt0) continue if (not (isinstance(t, TextToken))): continue if (not t.chars.is_letter): continue tok = models.try_parse(t, TerminParseAttr.NO) if (tok is None): if (not t.chars.is_all_lower): tok = obj_by_names.try_parse(t, TerminParseAttr.NO) if (tok is None): continue if (not tok.is_whitespace_after): if (tok.end_token.next0_ is None or not tok.end_token.next0_.is_char_of(",.)")): if (not BracketHelper.is_bracket(tok.end_token.next0_, False)): continue tr = None li = Utils.asObjectOrNull(tok.termin.tag, list) if (li is not None and len(li) == 1): tr = li[0] else: tr = (Utils.asObjectOrNull(tok.termin.tag, Referent)) if (tr is not None): tit = WeaponItemToken.try_parse(tok.begin_token.previous, None, False, True) if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND): tr.add_slot(WeaponReferent.ATTR_BRAND, tit.value, False, 0) tok.begin_token = tit.begin_token rt0 = ReferentToken(tr, tok.begin_token, tok.end_token) kit.embed_token(rt0) t = (rt0) continue
def parse(t : 'Token', max_char : int=0, prev : 'InstrToken'=None) -> 'InstrToken': from pullenti.ner.instrument.internal.InstrToken1 import InstrToken1 is_start_of_line = False t00 = t if (t is not None): is_start_of_line = t00.is_newline_before while t is not None: if (t.is_table_control_char and not t.is_char(chr(0x1F))): if (t.is_newline_after and not is_start_of_line): is_start_of_line = True t = t.next0_ else: break if (t is None): return None if (t.is_newline_before): is_start_of_line = True if (is_start_of_line): if ((t.is_value("СОДЕРЖИМОЕ", "ВМІСТ") or t.is_value("СОДЕРЖАНИЕ", "ЗМІСТ") or t.is_value("ОГЛАВЛЕНИЕ", "ЗМІСТ")) or ((t.is_value("СПИСОК", None) and t.next0_ is not None and t.next0_.is_value("РАЗДЕЛ", None)))): cont = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False) if (cont is not None and cont.typ == InstrToken1.Types.INDEX): return InstrToken(t, cont.end_token) t0 = t t1 = None has_word = False first_pass3255 = True while True: if first_pass3255: first_pass3255 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_newline_before and t != t0): break if (max_char > 0 and t.begin_char > max_char): break if (is_start_of_line and t == t0): if (t.is_value("ГЛАВА", None)): next0__ = InstrToken.parse(t.next0_, 0, None) if (next0__ is not None and next0__.typ == ILTypes.PERSON): next0__.begin_token = t return next0__ tt = None if ((isinstance(t.get_referent(), PersonReferent)) or (isinstance(t.get_referent(), PersonPropertyReferent)) or (isinstance(t.get_referent(), InstrumentParticipantReferent))): return InstrToken.__correct_person(InstrToken._new1511(t00, t, ILTypes.PERSON, t)) is_ref = False if (isinstance(t.get_referent(), PersonPropertyReferent)): tt = t.next0_ is_ref = True elif (prev is not None and prev.typ == ILTypes.PERSON): rt = t.kit.process_referent(PersonAnalyzer.ANALYZER_NAME, t) if (rt is not None): if (isinstance(rt.referent, PersonReferent)): return InstrToken._new1512(t00, rt.end_token, ILTypes.PERSON) tt = rt.end_token.next0_ cou = 0 t11 = (None if tt is None else tt.previous) first_pass3256 = True while True: if first_pass3256: first_pass3256 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_table_control_char): continue re = tt.get_referent() if (isinstance(re, PersonReferent)): return InstrToken._new1511(t00, tt, ILTypes.PERSON, tt) if (isinstance(re, GeoReferent)): t11 = tt continue if (re is not None): break if (DecreeToken.is_keyword(tt, False) is not None): break if (tt.is_newline_before): cou += 1 if (cou > 4): break if (tt is None and is_ref): return InstrToken._new1511(t00, Utils.ifNotNull(t11, t), ILTypes.PERSON, t) dt = DecreeToken.try_attach(t, None, False) if (dt is not None): if (dt.typ == DecreeToken.ItemType.TYP and not t.chars.is_all_lower): if (t != t0): break has_verb_ = False tt = dt.end_token while tt is not None: if (tt.is_newline_before): break elif ((isinstance(tt, TextToken)) and tt.is_pure_verb): has_verb_ = True break tt = tt.next0_ if (not has_verb_): res2 = InstrToken._new1515(t0, dt.end_token, ILTypes.TYP, Utils.ifNotNull(dt.full_value, dt.value)) if (res2.value == "ДОПОЛНИТЕЛЬНОЕ СОГЛАШЕНИЕ" or res2.value == "ДОДАТКОВА УГОДА"): if (res2.begin_char > 500 and res2.newlines_before_count > 1): res2.typ = ILTypes.APPENDIX return res2 if (dt.typ == DecreeToken.ItemType.NUMBER): if (t != t0): break return InstrToken._new1515(t0, dt.end_token, ILTypes.REGNUMBER, dt.value) if (dt.typ == DecreeToken.ItemType.ORG): if (t != t0): break return InstrToken._new1517(t0, dt.end_token, ILTypes.ORGANIZATION, dt.ref, dt.value) if (dt.typ == DecreeToken.ItemType.TERR): if (t != t0): break re = InstrToken._new1517(t0, dt.end_token, ILTypes.GEO, dt.ref, dt.value) t1 = re.end_token.next0_ if (t1 is not None and t1.is_char(',')): t1 = t1.next0_ if (t1 is not None and t1.is_value("КРЕМЛЬ", None)): re.end_token = t1 elif ((t1 is not None and t1.is_value("ДОМ", "БУДИНОК") and t1.next0_ is not None) and t1.next0_.is_value("СОВЕТ", "РАД")): re.end_token = t1.next0_ if (t1.next0_.next0_ is not None and (isinstance(t1.next0_.next0_.get_referent(), GeoReferent))): re.end_token = t1.next0_.next0_ return re if (dt.typ == DecreeToken.ItemType.OWNER): if (t != t0): break if (dt.ref is not None and str(dt.ref.referent).startswith("агент")): dt = (None) if (dt is not None): res1 = InstrToken._new1517(t0, dt.end_token, ILTypes.PERSON, dt.ref, dt.value) return InstrToken.__correct_person(res1) if (BracketHelper.can_be_start_of_sequence(t, False, False)): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t1 = br.end_token t = t1 continue if (t.next0_ is not None and BracketHelper.can_be_end_of_sequence(t.next0_, False, None, False)): t1 = t.next0_ t = t1 continue if (isinstance(t, TextToken)): if (t.is_char('_')): t1 = t continue r = t.get_referent() if (isinstance(r, DateReferent)): tt = t if (tt.next0_ is not None and tt.next0_.is_char_of(",;")): tt = tt.next0_ if (not t.is_newline_before and not tt.is_newline_after): t1 = tt continue if (not has_word): return InstrToken._new1511(t, tt, ILTypes.DATE, t) if (t != t0): break has_word = True if (isinstance(r, InstrumentParticipantReferent)): tt = t.begin_token first_pass3257 = True while True: if first_pass3257: first_pass3257 = False else: tt = tt.next0_ if (not (tt is not None and (tt.end_char < t.end_char))): break rr = tt.get_referent() if (rr is None): continue if ((isinstance(rr, OrganizationReferent)) or (isinstance(rr, BankDataReferent)) or (isinstance(rr, UriReferent))): r = (None) break if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, InstrumentParticipantReferent))): if (t != t0): break if (isinstance(r, InstrumentParticipantReferent)): pass res1 = InstrToken._new1511(t, t, ILTypes.PERSON, t) return InstrToken.__correct_person(res1) if (isinstance(r, OrganizationReferent)): if (t != t0): break return InstrToken._new1511(t, t, ILTypes.ORGANIZATION, t) if (isinstance(r, DecreePartReferent)): dpr = Utils.asObjectOrNull(r, DecreePartReferent) if (dpr.appendix is not None): if (t.is_newline_before or is_start_of_line): if (t.is_newline_after or t.whitespaces_before_count > 30): return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ") ok = True tt = t.next0_ first_pass3258 = True while True: if first_pass3258: first_pass3258 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt is not None): tt = npt.end_token continue ok = False break if (ok): return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ") if ((isinstance(r, DecreeReferent)) and r.kind == DecreeKind.PUBLISHER and t == t0): res1 = InstrToken._new1512(t, t, ILTypes.APPROVED) tt = t.next0_ first_pass3259 = True while True: if first_pass3259: first_pass3259 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_char_of(",;")): continue if ((isinstance(tt.get_referent(), DecreeReferent)) and tt.get_referent().kind == DecreeKind.PUBLISHER): res1.end_token = t else: break return res1 if (t.is_value("ЗА", None) and t.next0_ is not None and t.is_newline_before): rr = t.next0_.get_referent() if ((isinstance(rr, PersonReferent)) or (isinstance(rr, PersonPropertyReferent)) or (isinstance(rr, InstrumentParticipantReferent))): if (t != t0): break res1 = InstrToken._new1511(t, t.next0_, ILTypes.PERSON, t.next0_) t = t.next0_.next0_ if ((isinstance(rr, InstrumentParticipantReferent)) and t is not None): r = t.get_referent() if ((r) is not None): if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))): res1.end_token = t res1.ref = (t) return res1 ii = 0 while ii < len(InstrToken._m_directives): if (t.is_value(InstrToken._m_directives[ii], None)): if (t.next0_ is not None and t.next0_.is_value("СЛЕДУЮЩЕЕ", "НАСТУПНЕ")): if (t != t0): break t11 = t.next0_ ok = False if (t11.next0_ is not None and t11.next0_.is_char_of(":.") and t11.next0_.is_newline_after): ok = True t11 = t11.next0_ if (ok): return InstrToken._new1515(t, t11, ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii]) if (t.is_newline_after or ((t.next0_ is not None and t.next0_.is_char(':') and t.next0_.is_newline_after))): if (t != t0): break if (not t.is_newline_before): if ((InstrToken._m_directives_norm[ii] != "ПРИКАЗ" and InstrToken._m_directives_norm[ii] != "ПОСТАНОВЛЕНИЕ" and InstrToken._m_directives_norm[ii] != "НАКАЗ") and InstrToken._m_directives_norm[ii] != "ПОСТАНОВУ"): break return InstrToken._new1515(t, (t if t.is_newline_after else t.next0_), ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii]) break ii += 1 if (t.is_newline_before and t.chars.is_letter and t.length_char == 1): for d in InstrToken._m_directives: t11 = MiscHelper.try_attach_word_by_letters(d, t, True) if (t11 is not None): if (t11.next0_ is not None and t11.next0_.is_char(':')): t11 = t11.next0_ return InstrToken._new1512(t, t11, ILTypes.DIRECTIVE) tte = (t.begin_token if isinstance(t, MetaToken) else t) term = (tte.term if isinstance(tte, TextToken) else None) if (is_start_of_line and not tte.chars.is_all_lower and t == t0): npt = NounPhraseHelper.try_parse(tte, NounPhraseParseAttr.NO, 0, None) if (npt is not None and ((term == "ПРИЛОЖЕНИЯ" or term == "ДОДАТКИ"))): # if (tte.Next != null && tte.Next.IsChar(':')) npt = (None) if (npt is not None and npt.morph.case_.is_nominative and (isinstance(npt.end_token, TextToken))): term1 = npt.end_token.term if (((term1 == "ПРИЛОЖЕНИЕ" or term1 == "ДОДАТОК" or term1 == "МНЕНИЕ") or term1 == "ДУМКА" or term1 == "АКТ") or term1 == "ФОРМА" or term == "ЗАЯВКА"): tt1 = npt.end_token.next0_ dt1 = DecreeToken.try_attach(tt1, None, False) if (dt1 is not None and dt1.typ == DecreeToken.ItemType.NUMBER): tt1 = dt1.end_token.next0_ elif (isinstance(tt1, NumberToken)): tt1 = tt1.next0_ elif ((isinstance(tt1, TextToken)) and tt1.length_char == 1 and tt1.chars.is_letter): tt1 = tt1.next0_ ok = True if (tt1 is None): ok = False elif (tt1.is_value("В", "У")): ok = False elif (tt1.is_value("К", None) and tt1.is_newline_before): return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1) elif (not tt1.is_newline_before and InstrToken._check_entered(tt1) is not None): ok = False elif (tt1 == t.next0_ and ((tt1.is_char(':') or ((tt1.is_value("НА", None) and term1 != "ЗАЯВКА"))))): ok = False if (ok): br = BracketHelper.try_parse(tt1, BracketParseAttr.NO, 100) if (br is not None): tt1 = br.end_token.next0_ if (br.end_token.next0_ is None or not br.end_token.is_newline_after or br.end_token.next0_.is_char_of(";,")): ok = False if (tt1 is not None and tt1.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): ok = False if (prev is not None and prev.typ == ILTypes.APPENDIX): ok = False if (ok): cou = 0 ttt = tte.previous while ttt is not None and (cou < 300): if (ttt.is_table_control_char): if (not ttt.is_char(chr(0x1F))): if (ttt == tte.previous and ttt.is_char(chr(0x1E))): pass else: ok = False break ttt = ttt.previous; cou += 1 if (ok): it1 = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False) if (it1 is not None): if (it1.has_verb): ok = False if (ok and t.previous is not None): ttp = t.previous first_pass3260 = True while True: if first_pass3260: first_pass3260 = False else: ttp = ttp.previous if (not (ttp is not None)): break if (ttp.is_table_control_char and not ttp.is_char(chr(0x1F))): continue if (BracketHelper.is_bracket(ttp, False) and not BracketHelper.can_be_end_of_sequence(ttp, False, None, False)): continue if (ttp.is_char_of(";:")): ok = False break if ((ok and t.previous is not None and (t.newlines_before_count < 3)) and not t.is_newline_after): lines = 0 ttp = t.previous first_pass3261 = True while True: if first_pass3261: first_pass3261 = False else: ttp = ttp.previous if (not (ttp is not None)): break if (not ttp.is_newline_before): continue while ttp is not None and (ttp.end_char < t.begin_char): if (isinstance(ttp, NumberToken)): pass elif ((isinstance(ttp, TextToken)) and ttp.length_char > 1): if (ttp.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): ok = False break else: break ttp = ttp.next0_ lines += 1 if (lines > 1): break if (ok and ((term1 != "ПРИЛОЖЕНИЕ" and term1 != "ДОДАТОК" and term1 != "МНЕНИЕ"))): if (t.newlines_before_count < 3): ok = False if (ok): return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1) app = False if ((((term == "ОСОБОЕ" or term == "ОСОБЛИВЕ")) and t.next0_ is not None and t.next0_.is_value("МНЕНИЕ", "ДУМКА")) and t == t0 and is_start_of_line): app = True if ((((term == "ДОПОЛНИТЕЛЬНОЕ" or term == "ДОДАТКОВА")) and t.next0_ is not None and t.next0_.is_value("СОГЛАШЕНИЕ", "УГОДА")) and t == t0 and is_start_of_line): app = True if (app): tt = t.next0_ while tt is not None: if (tt.is_newline_before): break elif (tt.get_morph_class_in_dictionary() == MorphClass.VERB): app = False break tt = tt.next0_ if (app): return InstrToken._new1512(t, t.next0_, ILTypes.APPENDIX) if (not t.chars.is_all_lower and t == t0): tt = InstrToken._check_approved(t) if (tt is not None): if (tt.next0_ is not None and (isinstance(tt.next0_.get_referent(), DecreeReferent))): return InstrToken._new1511(t, tt, ILTypes.APPROVED, tt.next0_.get_referent()) dt1 = DecreeToken.try_attach(tt.next0_, None, False) if (dt1 is not None and dt1.typ == DecreeToken.ItemType.TYP): return InstrToken._new1512(t, tt, ILTypes.APPROVED) t1 = t is_start_of_line = False if (t1 is None): return None res = InstrToken._new1512(t00, t1, ILTypes.UNDEFINED) res.no_words = True t = t0 first_pass3262 = True while True: if first_pass3262: first_pass3262 = False else: t = t.next0_ if (not (t is not None and t.end_char <= t1.end_char)): break if (not (isinstance(t, TextToken))): if (isinstance(t, ReferentToken)): res.no_words = False continue if (not t.chars.is_letter): continue res.no_words = False if (t.is_pure_verb): res.has_verb = True if (t0.is_value("ВОПРОС", "ПИТАННЯ") and t0.next0_ is not None and t0.next0_.is_char_of(":.")): res.typ = ILTypes.QUESTION return res
def try_parse(t : 'Token', loc_onto : 'IntOntologyCollection') -> 'NamedItemToken': if (t is None): return None if (isinstance(t, ReferentToken)): r = t.get_referent() if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or (isinstance(r, GeoReferent))) or r.type_name == "ORGANIZATION"): return NamedItemToken._new1758(t, t, r, t.morph) return None typ = NamedItemToken.__m_types.try_parse(t, TerminParseAttr.NO) nam = NamedItemToken.__m_names.try_parse(t, TerminParseAttr.NO) if (typ is not None): if (not (isinstance(t, TextToken))): return None res = NamedItemToken._new1759(typ.begin_token, typ.end_token, typ.morph, typ.chars) res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind)) res.type_value = typ.termin.canonic_text if ((nam is not None and nam.end_token == typ.end_token and not t.chars.is_all_lower) and (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) == res.kind): res.name_value = nam.termin.canonic_text res.is_wellknown = True return res if (nam is not None): if (nam.begin_token.chars.is_all_lower): return None res = NamedItemToken._new1759(nam.begin_token, nam.end_token, nam.morph, nam.chars) res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) res.name_value = nam.termin.canonic_text ok = True if (not t.is_whitespace_before and t.previous is not None): ok = False elif (not t.is_whitespace_after and t.next0_ is not None): if (t.next0_.is_char_of(",.;!?") and t.next0_.is_whitespace_after): pass else: ok = False if (ok): res.is_wellknown = True res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str)) return res adj = MiscLocationHelper.try_attach_nord_west(t) if (adj is not None): if (adj.morph.class0_.is_noun): if (adj.end_token.is_value("ВОСТОК", None)): if (adj.begin_token == adj.end_token): return None re = NamedItemToken._new1761(t, adj.end_token, adj.morph) re.kind = NamedEntityKind.LOCATION re.name_value = MiscHelper.get_text_value(t, adj.end_token, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) re.is_wellknown = True return re return None if (adj.whitespaces_after_count > 2): return None if ((isinstance(adj.end_token.next0_, ReferentToken)) and (isinstance(adj.end_token.next0_.get_referent(), GeoReferent))): re = NamedItemToken._new1761(t, adj.end_token.next0_, adj.end_token.next0_.morph) re.kind = NamedEntityKind.LOCATION re.name_value = MiscHelper.get_text_value(t, adj.end_token.next0_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) re.is_wellknown = True re.ref = adj.end_token.next0_.get_referent() return re res = NamedItemToken.try_parse(adj.end_token.next0_, loc_onto) if (res is not None and res.kind == NamedEntityKind.LOCATION): s = adj.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, res.morph.gender, False) if (s is not None): if (res.name_value is None): res.name_value = s.upper() else: res.name_value = "{0} {1}".format(s.upper(), res.name_value) res.type_value = (None) res.begin_token = t res.chars = t.chars res.is_wellknown = True return res if (t.chars.is_capital_upper and not MiscHelper.can_be_start_of_sentence(t)): npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None and len(npt.adjectives) > 0): test = NamedItemToken.try_parse(npt.noun.begin_token, loc_onto) if (test is not None and test.end_token == npt.end_token and test.type_value is not None): test.begin_token = t tmp = io.StringIO() for a in npt.adjectives: s = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, test.morph.gender, False) if (tmp.tell() > 0): print(' ', end="", file=tmp) print(s, end="", file=tmp) test.name_value = Utils.toStringStringIO(tmp) test.chars = t.chars if (test.kind == NamedEntityKind.LOCATION): test.is_wellknown = True return test if ((BracketHelper.is_bracket(t, True) and t.next0_ is not None and t.next0_.chars.is_letter) and not t.next0_.chars.is_all_lower): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): res = NamedItemToken(t, br.end_token) res.is_in_bracket = True res.name_value = MiscHelper.get_text_value(t, br.end_token, GetTextAttr.NO) nam = NamedItemToken.__m_names.try_parse(t.next0_, TerminParseAttr.NO) if (nam is not None and nam.end_token == br.end_token.previous): res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) res.is_wellknown = True res.name_value = nam.termin.canonic_text return res if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): res = NamedItemToken._new1761(t, t, t.morph) str0_ = t.term if (str0_.endswith("О") or str0_.endswith("И") or str0_.endswith("Ы")): res.name_value = str0_ else: res.name_value = t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) res.chars = t.chars if (((not t.is_whitespace_after and t.next0_ is not None and t.next0_.is_hiphen) and (isinstance(t.next0_.next0_, TextToken)) and not t.next0_.next0_.is_whitespace_after) and t.chars.is_cyrillic_letter == t.next0_.next0_.chars.is_cyrillic_letter): res.end_token = t.next0_.next0_ t = res.end_token res.name_value = "{0}-{1}".format(res.name_value, t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)) return res return None
def __analize_list_items(chi : typing.List['FragToken'], ind : int) -> int: if (ind >= len(chi)): return -1 res = chi[ind] ki = res.kind if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM or ki == InstrumentKind.SUBITEM) or ki == InstrumentKind.CLAUSEPART or ki == InstrumentKind.INDENTION): pass else: return -1 if (res.has_changes and res.multiline_changes_value is not None): ci = res.multiline_changes_value cit = FragToken._new1340(ci.begin_token, ci.end_token, InstrumentKind.CITATION) res.children.append(cit) if (BracketHelper.is_bracket(cit.begin_token.previous, True)): cit.begin_token = cit.begin_token.previous if (BracketHelper.is_bracket(cit.end_token.next0_, True)): cit.end_token = cit.end_token.next0_ if (cit.end_token.next0_ is not None and cit.end_token.next0_.is_char_of(";.")): cit.end_token = cit.end_token.next0_ res.fill_by_content_children() if (res.children[0].has_changes): pass cit_kind = InstrumentKind.UNDEFINED if (isinstance(ci.tag, DecreeChangeReferent)): dcr = Utils.asObjectOrNull(ci.tag, DecreeChangeReferent) if (dcr.value is not None and len(dcr.value.new_items) > 0): mnem = dcr.value.new_items[0] i = 0 i = mnem.find(' ') if (((i)) > 0): mnem = mnem[0:0+i] cit_kind = PartToken._get_instr_kind_by_typ(PartToken._get_type_by_attr_name(mnem)) elif (len(dcr.owners) > 0 and (isinstance(dcr.owners[0], DecreePartReferent)) and dcr.kind == DecreeChangeKind.NEW): pat = Utils.asObjectOrNull(dcr.owners[0], DecreePartReferent) min0_ = 0 for s in pat.slots: ty = PartToken._get_type_by_attr_name(s.type_name) if (ty == PartToken.ItemType.UNDEFINED): continue l_ = PartToken._get_rank(ty) if (l_ == 0): continue if (l_ > min0_ or min0_ == 0): min0_ = l_ cit_kind = PartToken._get_instr_kind_by_typ(ty) sub = None if (cit_kind != InstrumentKind.UNDEFINED and cit_kind != InstrumentKind.APPENDIX): sub = FragToken(ci.begin_token, ci.end_token) wr = ContentAnalyzeWhapper() wr.analyze(sub, None, True, cit_kind) sub.kind = InstrumentKind.CONTENT else: sub = FragToken.create_document(ci.begin_token, ci.end_char, cit_kind) if (sub is None or len(sub.children) == 0): pass elif ((sub.kind == InstrumentKind.CONTENT and len(sub.children) > 0 and sub.children[0].begin_token == sub.begin_token) and sub.children[len(sub.children) - 1].end_token == sub.end_token): cit.children.extend(sub.children) else: cit.children.append(sub) return 1 end_char = res.end_char if (res._itok is None): res._itok = InstrToken1.parse(res.begin_token, True, None, 0, None, False, res.end_char, False, False) lines = ListHelper.LineToken.parse_list(res.begin_token, end_char, None) if (lines is None or (len(lines) < 1)): return -1 ret = 1 if (res.kind == InstrumentKind.CONTENT): j = ind + 1 while j < len(chi): if (chi[j].kind == InstrumentKind.CONTENT): lines2 = ListHelper.LineToken.parse_list(chi[j].begin_token, chi[j].end_char, lines[len(lines) - 1]) if (lines2 is None or (len(lines2) < 1)): break if (not lines2[0].is_list_item): if ((len(lines2) > 1 and lines2[1].is_list_item and lines2[0].end_token.is_char_of(":")) and not lines2[0].begin_token.chars.is_capital_upper): lines2[0].is_list_item = True else: break lines.extend(lines2) ret = ((j - ind) + 1) elif (chi[j].kind != InstrumentKind.EDITIONS and chi[j].kind != InstrumentKind.COMMENT): break j += 1 if (len(lines) < 2): return -1 if ((len(lines) > 1 and lines[0].is_list_item and lines[1].is_list_item) and lines[0].number != 1): if (len(lines) == 2 or not lines[2].is_list_item): lines[1].is_list_item = False lines[0].is_list_item = lines[1].is_list_item i = 0 first_pass3276 = True while True: if first_pass3276: first_pass3276 = False else: i += 1 if (not (i < len(lines))): break if (lines[i].is_list_item): if (i > 0 and lines[i - 1].is_list_item): continue if (((i + 1) < len(lines)) and lines[i + 1].is_list_item): pass else: lines[i].is_list_item = False continue j = 0 new_line = False j = (i + 1) while j < len(lines): if (not lines[j].is_list_item): break elif (lines[j].is_newline_before): new_line = True j += 1 if (new_line): continue if (i > 0 and lines[i - 1].end_token.is_char(':')): continue j = i while j < len(lines): if (not lines[j].is_list_item): break else: lines[j].is_list_item = False j += 1 if (len(lines) > 2): last = lines[len(lines) - 1] last2 = lines[len(lines) - 2] if ((not last.is_list_item and last.end_token.is_char('.') and last2.is_list_item) and last2.end_token.is_char(';')): if ((last.length_char < (last2.length_char * 2)) or last.begin_token.chars.is_all_lower): last.is_list_item = True i = 0 while i < (len(lines) - 1): if (not lines[i].is_list_item and not lines[i + 1].is_list_item): if (((i + 2) < len(lines)) and lines[i + 2].is_list_item and lines[i + 1].end_token.is_char(':')): pass else: lines[i].end_token = lines[i + 1].end_token del lines[i + 1] i -= 1 i += 1 i = 0 while i < (len(lines) - 1): if (lines[i].is_list_item): if (lines[i].number == 1): ok = True num = 1 nonum = 0 j = i + 1 while j < len(lines): if (not lines[j].is_list_item): ok = False break elif (lines[j].number > 0): num += 1 if (lines[j].number != num): ok = False break else: nonum += 1 j += 1 if (not ok or nonum == 0 or (num < 2)): break lt = lines[i] j = i + 1 while j < len(lines): if (lines[j].number > 0): lt = lines[j] else: chli = Utils.asObjectOrNull(lt.tag, list) if (chli is None): chli = list() lt.tag = (chli) lt.end_token = lines[j].end_token chli.append(lines[j]) del lines[j] j -= 1 j += 1 i += 1 cou = 0 for li in lines: if (li.is_list_item): cou += 1 if (cou < 2): return -1 i = 0 first_pass3277 = True while True: if first_pass3277: first_pass3277 = False else: i += 1 if (not (i < len(lines))): break if (lines[i].is_list_item): i0 = i ok = True cou = 1 while i < len(lines): if (not lines[i].is_list_item): break elif (lines[i].number != cou): ok = False i += 1; cou += 1 if (not ok): i = i0 while i < len(lines): if (not lines[i].is_list_item): break else: lines[i].number = 0 i += 1 if (cou > 3 and lines[i0].begin_token.get_source_text() != lines[i0 + 1].begin_token.get_source_text() and lines[i0 + 1].begin_token.get_source_text() == lines[i0 + 2].begin_token.get_source_text()): pref = lines[i0 + 1].begin_token.get_source_text() ok = True j = i0 + 2 while j < i: if (pref != lines[j].begin_token.get_source_text()): ok = False break j += 1 if (not ok): continue tt = None ok = False tt = lines[i0].end_token.previous while tt is not None and tt != lines[i0].begin_token: if (tt.get_source_text() == pref): ok = True break tt = tt.previous if (ok): li0 = ListHelper.LineToken(lines[i0].begin_token, tt.previous) lines[i0].begin_token = tt lines.insert(i0, li0) i += 1 for li in lines: li.correct_begin_token() ch = FragToken._new1357(li.begin_token, li.end_token, (InstrumentKind.LISTITEM if li.is_list_item else InstrumentKind.CONTENT), li.number) if (ch.kind == InstrumentKind.CONTENT and ch.end_token.is_char(':')): ch.kind = InstrumentKind.LISTHEAD res.children.append(ch) chli = Utils.asObjectOrNull(li.tag, list) if (chli is not None): for lt in chli: ch.children.append(FragToken._new1340(lt.begin_token, lt.end_token, InstrumentKind.LISTITEM)) if (ch.begin_char < ch.children[0].begin_char): ch.children.insert(0, FragToken._new1340(ch.begin_token, ch.children[0].begin_token.previous, InstrumentKind.CONTENT)) return ret