示例#1
0
 def try_parse_author(
     t: 'Token',
     prev_pers_template: 'FioTemplateType' = FioTemplateType.UNDEFINED
 ) -> 'BookLinkToken':
     if (t is None):
         return None
     rtp = PersonItemToken.try_parse_person(t, prev_pers_template)
     if (rtp is not None):
         re = None
         if (rtp.data is None):
             re = BookLinkToken._new326(t,
                                        (t if rtp == t else rtp.end_token),
                                        BookLinkTyp.PERSON, rtp.referent)
         else:
             re = BookLinkToken._new327(t, rtp.end_token,
                                        BookLinkTyp.PERSON, rtp)
         re.person_template = (Utils.valToEnum(rtp.misc_attrs,
                                               FioTemplateType))
         tt = rtp.begin_token
         first_pass3018 = True
         while True:
             if first_pass3018: first_pass3018 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= rtp.end_char)):
                 break
             if (not (isinstance(tt.get_referent(),
                                 PersonPropertyReferent))):
                 continue
             rt = Utils.asObjectOrNull(tt, ReferentToken)
             if (rt.begin_token.chars.is_capital_upper
                     and tt != rtp.begin_token):
                 re.start_of_name = MiscHelper.get_text_value_of_meta_token(
                     rt, GetTextAttr.KEEPREGISTER)
                 break
             return None
         return re
     if (t.is_char('[')):
         re = BookLinkToken.try_parse_author(t.next0_,
                                             FioTemplateType.UNDEFINED)
         if (re is not None and re.end_token.next0_ is not None
                 and re.end_token.next0_.is_char(']')):
             re.begin_token = t
             re.end_token = re.end_token.next0_
             return re
     if (((t.is_value("И", None) or t.is_value("ET", None)))
             and t.next0_ is not None):
         if (t.next0_.is_value("ДРУГИЕ", None)
                 or t.next0_.is_value("ДР", None)
                 or t.next0_.is_value("AL", None)):
             res = BookLinkToken._new328(t, t.next0_, BookLinkTyp.ANDOTHERS)
             if (t.next0_.next0_ is not None
                     and t.next0_.next0_.is_char('.')):
                 res.end_token = res.end_token.next0_
             return res
     return None
示例#2
0
 def createNickname(pr : 'PersonReferent', t : 'Token') -> 'Token':
     """ Выделить кличку
     
     Args:
         pr(PersonReferent): 
         t(Token): начальный токен
     
     Returns:
         Token: если не null, то последний токен клички, а в pr запишет саму кличку
     """
     has_keyw = False
     is_br = False
     first_pass3097 = True
     while True:
         if first_pass3097: first_pass3097 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_hiphen or t.is_comma or t.isCharOf(".:;")): 
             continue
         if (t.morph.class0_.is_preposition): 
             continue
         if (t.isChar('(')): 
             is_br = True
             continue
         if ((t.isValue("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.isValue("КЛИЧКА", None) or t.isValue("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.isValue("ПСЕВДО", None) or t.isValue("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): 
             has_keyw = True
             continue
         break
     if (not has_keyw or t is None): 
         return None
     if (BracketHelper.isBracket(t, True)): 
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
             if (ni is not None): 
                 pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                 t = br.end_token
                 tt = t.next0_
                 first_pass3098 = True
                 while True:
                     if first_pass3098: first_pass3098 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_comma_and): 
                         continue
                     if (not BracketHelper.isBracket(tt, True)): 
                         break
                     br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100)
                     if (br is None): 
                         break
                     ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
                     if (ni is not None): 
                         pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     tt = br.end_token
                     t = tt
                 if (is_br and t.next0_ is not None and t.next0_.isChar(')')): 
                     t = t.next0_
                 return t
     else: 
         pli = PersonItemToken.tryAttachList(t, None, PersonItemToken.ParseAttr.NO, 10)
         if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): 
             ni = MiscHelper.getTextValue(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO)
             if (ni is not None): 
                 pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                 t = pli[len(pli) - 1].end_token
                 if (is_br and t.next0_ is not None and t.next0_.isChar(')')): 
                     t = t.next0_
                 return t
     return None
示例#3
0
 def _createReferentToken(p : 'PersonReferent', begin : 'Token', end : 'Token', morph_ : 'MorphCollection', attrs : typing.List['PersonAttrToken'], ad : 'PersonAnalyzerData', for_attribute : bool, after_be_predicate : bool) -> 'ReferentToken':
     from pullenti.ner.person.internal.PersonIdentityToken import PersonIdentityToken
     if (p is None): 
         return None
     has_prefix = False
     if (attrs is not None): 
         for a in attrs: 
             if (a.typ == PersonAttrTerminType.BESTREGARDS): 
                 has_prefix = True
             else: 
                 if (a.begin_char < begin.begin_char): 
                     begin = a.begin_token
                 if (a.typ != PersonAttrTerminType.PREFIX): 
                     if (a.age is not None): 
                         p.addSlot(PersonReferent.ATTR_AGE, a.age, False, 0)
                     if (a.prop_ref is None): 
                         p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                     else: 
                         p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
                 elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                     p.is_female = True
                 elif (a.gender == MorphGender.MASCULINE and not p.is_male): 
                     p.is_male = True
     elif ((isinstance(begin.previous, TextToken)) and (begin.whitespaces_before_count < 3)): 
         if ((begin.previous).term == "ИП"): 
             a = PersonAttrToken(begin.previous, begin.previous)
             a.prop_ref = PersonPropertyReferent()
             a.prop_ref.name = "индивидуальный предприниматель"
             p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
             begin = begin.previous
     m0 = MorphCollection()
     for it in morph_.items: 
         bi = MorphBaseInfo(it)
         bi.number = MorphNumber.SINGULAR
         if (bi.gender == MorphGender.UNDEFINED): 
             if (p.is_male and not p.is_female): 
                 bi.gender = MorphGender.MASCULINE
             if (not p.is_male and p.is_female): 
                 bi.gender = MorphGender.FEMINIE
         m0.addItem(bi)
     morph_ = m0
     if ((attrs is not None and len(attrs) > 0 and not attrs[0].morph.case_.is_undefined) and morph_.case_.is_undefined): 
         morph_.case_ = attrs[0].morph.case_
         if (attrs[0].morph.number == MorphNumber.SINGULAR): 
             morph_.number = MorphNumber.SINGULAR
         if (p.is_male and not p.is_female): 
             morph_.gender = MorphGender.MASCULINE
         elif (p.is_female): 
             morph_.gender = MorphGender.FEMINIE
     if (begin.previous is not None): 
         ttt = begin.previous
         if (ttt.isValue("ИМЕНИ", "ІМЕНІ")): 
             for_attribute = True
         else: 
             if (ttt.isChar('.') and ttt.previous is not None): 
                 ttt = ttt.previous
             if (ttt.whitespaces_after_count < 3): 
                 if (ttt.isValue("ИМ", "ІМ")): 
                     for_attribute = True
     if (for_attribute): 
         return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
     if ((begin.previous is not None and begin.previous.is_comma_and and (isinstance(begin.previous.previous, ReferentToken))) and (isinstance(begin.previous.previous.getReferent(), PersonReferent))): 
         rt00 = Utils.asObjectOrNull(begin.previous.previous, ReferentToken)
         ttt = rt00
         while ttt is not None: 
             if (ttt.previous is None or not ((isinstance(ttt.previous.previous, ReferentToken)))): 
                 break
             if (not ttt.previous.is_comma_and or not ((isinstance(ttt.previous.previous.getReferent(), PersonReferent)))): 
                 break
             rt00 = (Utils.asObjectOrNull(ttt.previous.previous, ReferentToken))
             ttt = (rt00)
         if (isinstance(rt00.begin_token.getReferent(), PersonPropertyReferent)): 
             ok = False
             if ((rt00.begin_token).end_token.next0_ is not None and (rt00.begin_token).end_token.next0_.isChar(':')): 
                 ok = True
             elif (rt00.begin_token.morph.number == MorphNumber.PLURAL): 
                 ok = True
             if (ok): 
                 p.addSlot(PersonReferent.ATTR_ATTR, rt00.begin_token.getReferent(), False, 0)
     if (ad is not None): 
         if (ad.overflow_level > 10): 
             return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
         ad.overflow_level += 1
     attrs1 = None
     has_position = False
     open_br = False
     t = end.next0_
     first_pass3095 = True
     while True:
         if first_pass3095: first_pass3095 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             if (t.newlines_before_count > 2): 
                 break
             if (attrs1 is not None and len(attrs1) > 0): 
                 break
             ml = MailLine.parse(t, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             if (t.chars.is_capital_upper): 
                 attr1 = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
                 ok1 = False
                 if (attr1 is not None): 
                     if (has_prefix or attr1.is_newline_after or ((attr1.end_token.next0_ is not None and attr1.end_token.next0_.is_table_control_char))): 
                         ok1 = True
                     else: 
                         tt2 = t.next0_
                         while tt2 is not None and tt2.end_char <= attr1.end_char: 
                             if (tt2.is_whitespace_before): 
                                 ok1 = True
                             tt2 = tt2.next0_
                 else: 
                     ttt = PersonHelper.__correctTailAttributes(p, t)
                     if (ttt is not None and ttt != t): 
                         t = ttt
                         end = t
                         continue
                 if (not ok1): 
                     break
         if (t.is_hiphen or t.isCharOf("_>|")): 
             continue
         if (t.isValue("МОДЕЛЬ", None)): 
             break
         tt = PersonHelper.__correctTailAttributes(p, t)
         if (tt != t and tt is not None): 
             t = tt
             end = t
             continue
         is_be = False
         if (t.isChar('(') and t == end.next0_): 
             open_br = True
             t = t.next0_
             if (t is None): 
                 break
             pit1 = PersonItemToken.tryAttach(t, None, PersonItemToken.ParseAttr.NO, None)
             if ((pit1 is not None and t.chars.is_capital_upper and pit1.end_token.next0_ is not None) and (isinstance(t, TextToken)) and pit1.end_token.next0_.isChar(')')): 
                 if (pit1.lastname is not None): 
                     inf = MorphBaseInfo._new2321(MorphCase.NOMINATIVE)
                     if (p.is_male): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.MASCULINE), MorphGender)
                     if (p.is_female): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.FEMINIE), MorphGender)
                     sur = PersonIdentityToken.createLastname(pit1, inf)
                     if (sur is not None): 
                         p._addFioIdentity(sur, None, None)
                         t = pit1.end_token.next0_
                         end = t
                         continue
         elif (t.is_comma): 
             t = t.next0_
             if ((isinstance(t, TextToken)) and (t).isValue("WHO", None)): 
                 continue
         elif ((isinstance(t, TextToken)) and (t).is_verb_be): 
             t = t.next0_
         elif (t.is_and and t.is_whitespace_after and not t.is_newline_after): 
             if (t == end.next0_): 
                 break
             t = t.next0_
         elif (t.is_hiphen and t == end.next0_): 
             t = t.next0_
         elif (t.isChar('.') and t == end.next0_ and has_prefix): 
             t = t.next0_
         ttt2 = PersonHelper.createNickname(p, t)
         if (ttt2 is not None): 
             end = ttt2
             t = end
             continue
         if (t is None): 
             break
         attr = None
         attr = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
         if (attr is None): 
             if ((t is not None and t.getReferent() is not None and t.getReferent().type_name == "GEO") and attrs1 is not None and open_br): 
                 continue
             if ((t.chars.is_capital_upper and open_br and t.next0_ is not None) and t.next0_.isChar(')')): 
                 if (p.findSlot(PersonReferent.ATTR_LASTNAME, None, True) is None): 
                     p.addSlot(PersonReferent.ATTR_LASTNAME, t.getSourceText().upper(), False, 0)
                     t = t.next0_
                     end = t
             if (t is not None and t.isValue("КОТОРЫЙ", None) and t.morph.number == MorphNumber.SINGULAR): 
                 if (not p.is_female and t.morph.gender == MorphGender.FEMINIE): 
                     p.is_female = True
                     p._correctData()
                 elif (not p.is_male and t.morph.gender == MorphGender.MASCULINE): 
                     p.is_male = True
                     p._correctData()
             break
         if (attr.morph.number == MorphNumber.PLURAL): 
             break
         if (attr.typ == PersonAttrTerminType.BESTREGARDS): 
             break
         if (attr.is_doubt): 
             if (has_prefix): 
                 pass
             elif (t.is_newline_before and attr.is_newline_after): 
                 pass
             elif (t.previous is not None and ((t.previous.is_hiphen or t.previous.isChar(':')))): 
                 pass
             else: 
                 break
         if (not morph_.case_.is_undefined and not attr.morph.case_.is_undefined): 
             if (((morph_.case_) & attr.morph.case_).is_undefined and not is_be): 
                 break
         if (open_br): 
             if (PersonAnalyzer._tryAttachPerson(t, ad, False, 0, True) is not None): 
                 break
         if (attrs1 is None): 
             if (t.previous.is_comma and t.previous == end.next0_): 
                 ttt = attr.end_token.next0_
                 if (ttt is not None): 
                     if (ttt.morph.class0_.is_verb): 
                         if (MiscHelper.canBeStartOfSentence(begin)): 
                             pass
                         else: 
                             break
             attrs1 = list()
         attrs1.append(attr)
         if (attr.typ == PersonAttrTerminType.POSITION or attr.typ == PersonAttrTerminType.KING): 
             if (not is_be): 
                 has_position = True
         elif (attr.typ != PersonAttrTerminType.PREFIX): 
             if (attr.typ == PersonAttrTerminType.OTHER and attr.age is not None): 
                 pass
             else: 
                 attrs1 = (None)
                 break
         t = attr.end_token
     if (attrs1 is not None and has_position and attrs is not None): 
         te1 = attrs[len(attrs) - 1].end_token.next0_
         te2 = attrs1[0].begin_token
         if (te1.whitespaces_after_count > te2.whitespaces_before_count and (te2.whitespaces_before_count < 2)): 
             pass
         elif (attrs1[0].age is not None): 
             pass
         elif (((te1.is_hiphen or te1.isChar(':'))) and not attrs1[0].is_newline_before and ((te2.previous.is_comma or te2.previous == end))): 
             pass
         else: 
             for a in attrs: 
                 if (a.typ == PersonAttrTerminType.POSITION): 
                     te = attrs1[len(attrs1) - 1].end_token
                     if (te.next0_ is not None): 
                         if (not te.next0_.isChar('.')): 
                             attrs1 = (None)
                             break
     if (attrs1 is not None and not has_prefix): 
         attr = attrs1[len(attrs1) - 1]
         ok = False
         if (attr.end_token.next0_ is not None and attr.end_token.next0_.chars.is_capital_upper): 
             ok = True
         else: 
             rt = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False)
             if (rt is not None and (isinstance(rt.referent, PersonReferent))): 
                 ok = True
         if (ok): 
             if (attr.begin_token.whitespaces_before_count > attr.end_token.whitespaces_after_count): 
                 attrs1 = (None)
             elif (attr.begin_token.whitespaces_before_count == attr.end_token.whitespaces_after_count): 
                 rt1 = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False)
                 if (rt1 is not None): 
                     attrs1 = (None)
     if (attrs1 is not None): 
         for a in attrs1: 
             if (a.typ != PersonAttrTerminType.PREFIX): 
                 if (a.age is not None): 
                     p.addSlot(PersonReferent.ATTR_AGE, a.age, True, 0)
                 elif (a.prop_ref is None): 
                     p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                 else: 
                     p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
                 end = a.end_token
                 if (a.gender != MorphGender.UNDEFINED and not p.is_female and not p.is_male): 
                     if (a.gender == MorphGender.MASCULINE and not p.is_male): 
                         p.is_male = True
                         p._correctData()
                     elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                         p.is_female = True
                         p._correctData()
         if (open_br): 
             if (end.next0_ is not None and end.next0_.isChar(')')): 
                 end = end.next0_
     crlf_cou = 0
     t = end.next0_
     first_pass3096 = True
     while True:
         if first_pass3096: first_pass3096 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             ml = MailLine.parse(t, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             crlf_cou += 1
         if (t.isCharOf(":,(") or t.is_hiphen): 
             continue
         if (t.isChar('.') and t == end.next0_): 
             continue
         r = t.getReferent()
         if (r is not None): 
             if (r.type_name == "PHONE" or r.type_name == "URI" or r.type_name == "ADDRESS"): 
                 ty = r.getStringValue("SCHEME")
                 if (r.type_name == "URI"): 
                     if ((ty != "mailto" and ty != "skype" and ty != "ICQ") and ty != "http"): 
                         break
                 p._addContact(r)
                 end = t
                 crlf_cou = 0
                 continue
         if (isinstance(r, PersonIdentityReferent)): 
             p.addSlot(PersonReferent.ATTR_IDDOC, r, False, 0)
             end = t
             crlf_cou = 0
             continue
         if (r is not None and r.type_name == "ORGANIZATION"): 
             if (t.next0_ is not None and t.next0_.morph.class0_.is_verb): 
                 break
             if (begin.previous is not None and begin.previous.morph.class0_.is_verb): 
                 break
             if (t.whitespaces_after_count == 1): 
                 break
             exist = False
             for s in p.slots: 
                 if (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonPropertyReferent))): 
                     pr = Utils.asObjectOrNull(s.value, PersonPropertyReferent)
                     if (pr.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
                 elif (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonAttrToken))): 
                     pr = Utils.asObjectOrNull(s.value, PersonAttrToken)
                     if (pr.referent.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
             if (not exist): 
                 pat = PersonAttrToken(t, t)
                 pat.prop_ref = PersonPropertyReferent._new2291("сотрудник")
                 pat.prop_ref.addSlot(PersonPropertyReferent.ATTR_REF, r, False, 0)
                 p.addSlot(PersonReferent.ATTR_ATTR, pat, False, 0)
             continue
         if (r is not None): 
             break
         if (not has_prefix or crlf_cou >= 2): 
             break
         rt = t.kit.processReferent("PERSON", t)
         if (rt is not None): 
             break
     if (ad is not None): 
         ad.overflow_level -= 1
     return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
示例#4
0
 def create_nickname(pr : 'PersonReferent', t : 'Token') -> 'Token':
     has_keyw = False
     is_br = False
     first_pass3367 = True
     while True:
         if first_pass3367: first_pass3367 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_hiphen or t.is_comma or t.is_char_of(".:;")): 
             continue
         if (t.morph.class0_.is_preposition): 
             continue
         if (t.is_char('(')): 
             is_br = True
             continue
         if ((t.is_value("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.is_value("КЛИЧКА", None) or t.is_value("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.is_value("ПСЕВДО", None) or t.is_value("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): 
             has_keyw = True
             continue
         break
     if (not has_keyw or t is None): 
         return None
     if (BracketHelper.is_bracket(t, True)): 
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
             if (ni is not None): 
                 pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                 t = br.end_token
                 tt = t.next0_
                 first_pass3368 = True
                 while True:
                     if first_pass3368: first_pass3368 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_comma_and): 
                         continue
                     if (not BracketHelper.is_bracket(tt, True)): 
                         break
                     br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
                     if (br is None): 
                         break
                     ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
                     if (ni is not None): 
                         pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     tt = br.end_token
                     t = tt
                 if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                     t = t.next0_
                 return t
     else: 
         ret = None
         first_pass3369 = True
         while True:
             if first_pass3369: first_pass3369 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (t.is_comma_and): 
                 continue
             if (ret is not None and t.chars.is_all_lower): 
                 break
             if (t.whitespaces_before_count > 2): 
                 break
             pli = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.NO, 10)
             if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): 
                 ni = MiscHelper.get_text_value(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO)
                 if (ni is not None): 
                     pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     t = pli[len(pli) - 1].end_token
                     if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                         t = t.next0_
                     ret = t
                     continue
             if ((isinstance(t, ReferentToken)) and not t.chars.is_all_lower and t.begin_token == t.end_token): 
                 val = MiscHelper.get_text_value_of_meta_token(Utils.asObjectOrNull(t, ReferentToken), GetTextAttr.NO)
                 pr.add_slot(PersonReferent.ATTR_NICKNAME, val, False, 0)
                 if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                     t = t.next0_
                 ret = t
                 continue
             break
         return ret
     return None
示例#5
0
 def __tryParse(t: 'Token',
                is_in_lit: bool,
                max_char: int = 0) -> typing.List['ReferentToken']:
     if (t is None):
         return None
     is_bracket_regime = False
     if (t.previous is not None and t.previous.isChar('(')):
         is_bracket_regime = True
     blt = BookLinkToken.tryParse(t, 0)
     if (blt is None):
         blt = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
     if (blt is None and not is_bracket_regime):
         return None
     t0 = t
     coef = 0
     is_electr_res = False
     decree = None
     regtyp = BookLinkAnalyzer.RegionTyp.UNDEFINED
     num = None
     spec_see = None
     book_prev = None
     if (is_bracket_regime):
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.PERSON):
         if (not is_in_lit):
             return None
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.NUMBER):
         num = blt.value
         t = blt.end_token.next0_
         if (t is None or t.is_newline_before):
             return None
         if (not t.is_whitespace_before):
             if (isinstance(t, NumberToken)):
                 n = (t).value
                 if ((((n == "3" or n == "0")) and not t.is_whitespace_after
                      and (isinstance(t.next0_, TextToken)))
                         and t.next0_.chars.is_all_lower):
                     pass
                 else:
                     return None
             elif (not ((isinstance(t, TextToken)))
                   or t.chars.is_all_lower):
                 r = t.getReferent()
                 if (isinstance(r, PersonReferent)):
                     pass
                 elif (is_in_lit and r is not None
                       and r.type_name == "DECREE"):
                     pass
                 else:
                     return None
         first_pass2757 = True
         while True:
             if first_pass2757: first_pass2757 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, NumberToken)):
                 break
             if (not ((isinstance(t, TextToken)))):
                 break
             if (BracketHelper.canBeStartOfSequence(t, True, False)):
                 break
             if (not t.chars.is_letter):
                 continue
             bbb = BookLinkToken.tryParse(t, 0)
             if (bbb is not None):
                 if (bbb.typ == BookLinkTyp.TAMZE):
                     spec_see = bbb
                     t = bbb.end_token.next0_
                     break
                 if (bbb.typ == BookLinkTyp.SEE):
                     t = bbb.end_token
                     continue
             break
         if (spec_see is not None and spec_see.typ == BookLinkTyp.TAMZE):
             coef += 1
             max0_ = 1000
             tt = t0
             while tt is not None and max0_ > 0:
                 if (isinstance(tt.getReferent(), BookLinkRefReferent)):
                     book_prev = (tt.getReferent()).book
                     break
                 tt = tt.previous
                 max0_ -= 1
         blt1 = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
         if (blt1 is not None and blt1.typ == BookLinkTyp.PERSON):
             regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
         else:
             ok = False
             tt = t
             first_pass2758 = True
             while True:
                 if first_pass2758: first_pass2758 = False
                 else: tt = (None if tt is None else tt.next0_)
                 if (not (tt is not None)): break
                 if (tt.is_newline_before):
                     break
                 if (is_in_lit and tt.getReferent() is not None
                         and tt.getReferent().type_name == "DECREE"):
                     ok = True
                     decree = tt
                     break
                 bbb = BookLinkToken.tryParse(tt, 0)
                 if (bbb is None):
                     continue
                 if (bbb.typ == BookLinkTyp.ELECTRONRES):
                     is_electr_res = True
                     ok = True
                     break
                 if (bbb.typ == BookLinkTyp.DELIMETER):
                     tt = bbb.end_token.next0_
                     if (BookLinkToken.tryParseAuthor(
                             tt, FioTemplateType.UNDEFINED) is not None):
                         ok = True
                         break
                     bbb = BookLinkToken.tryParse(tt, 0)
                     if (bbb is not None):
                         if (bbb.typ == BookLinkTyp.EDITORS
                                 or bbb.typ == BookLinkTyp.TRANSLATE
                                 or bbb.typ == BookLinkTyp.SOSTAVITEL):
                             ok = True
                             break
             if (not ok and not is_in_lit):
                 if (BookLinkToken.checkLinkBefore(t0, num)):
                     pass
                 else:
                     return None
             regtyp = BookLinkAnalyzer.RegionTyp.NAME
     else:
         return None
     res = BookLinkReferent()
     corr_authors = list()
     t00 = t
     blt00 = None
     start_of_name = None
     prev_pers_templ = FioTemplateType.UNDEFINED
     if (regtyp == BookLinkAnalyzer.RegionTyp.AUTHORS):
         first_pass2759 = True
         while True:
             if first_pass2759: first_pass2759 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (max_char > 0 and t.begin_char >= max_char):
                 break
             if (t.isCharOf(".;") or t.is_comma_and):
                 continue
             if (t.isChar('/')):
                 break
             if ((t.isChar('(') and t.next0_ is not None
                  and t.next0_.isValue("EDS", None))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.isChar(')')):
                 t = t.next0_.next0_.next0_
                 break
             blt = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt is None and t.previous is not None
                     and t.previous.is_and):
                 blt = BookLinkToken.tryParseAuthor(
                     t.previous, FioTemplateType.UNDEFINED)
             if (blt is None):
                 if ((isinstance(t.getReferent(), OrganizationReferent))
                         and blt00 is not None):
                     bbb2 = BookLinkToken.tryParse(t.next0_, 0)
                     if (bbb2 is not None):
                         if (bbb2.typ == BookLinkTyp.YEAR):
                             res.addSlot(BookLinkReferent.ATTR_AUTHOR,
                                         t.getReferent(), False, 0)
                             res.year = int(bbb2.value)
                             coef += .5
                             t = bbb2.end_token.next0_
                 break
             if (blt.typ == BookLinkTyp.PERSON):
                 tt2 = blt.end_token.next0_
                 bbb2 = BookLinkToken.tryParse(tt2, 0)
                 if (bbb2 is not None):
                     if (bbb2.typ == BookLinkTyp.YEAR):
                         res.year = int(bbb2.value)
                         coef += .5
                         blt.end_token = bbb2.end_token
                         blt00 = (None)
                 if (blt00 is not None
                         and ((blt00.end_token.next0_ == blt.begin_token
                               or blt.begin_token.previous.isChar('.')))):
                     tt11 = blt.end_token.next0_
                     nex = BookLinkToken.tryParse(tt11, 0)
                     if (nex is not None
                             and nex.typ == BookLinkTyp.ANDOTHERS):
                         pass
                     else:
                         if (tt11 is None):
                             break
                         if (tt11.isChar('/') and tt11.next0_ is not None
                                 and tt11.next0_.isChar('/')):
                             break
                         if (tt11.isChar(':')):
                             break
                         if ((str(blt).find('.') < 0)
                                 and str(blt00).find('.') > 0):
                             break
                         if ((isinstance(tt11, TextToken))
                                 and tt11.chars.is_all_lower):
                             break
                         if (tt11.isCharOf(",.;")
                                 and tt11.next0_ is not None):
                             tt11 = tt11.next0_
                         nex = BookLinkToken.tryParse(tt11, 0)
                         if (nex is not None
                                 and nex.typ != BookLinkTyp.PERSON
                                 and nex.typ != BookLinkTyp.ANDOTHERS):
                             break
                 elif (
                     (blt00 is not None
                      and blt00.person_template != FioTemplateType.UNDEFINED
                      and blt.person_template != blt00.person_template)
                         and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     if (blt.end_token.next0_ is None
                             or not blt.end_token.next0_.is_comma_and):
                         break
                     if (BookLinkToken.tryParseAuthor(
                             blt.end_token.next0_.next0_,
                             FioTemplateType.UNDEFINED) is not None):
                         pass
                     else:
                         break
                 if (blt00 is None and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     tt = blt.end_token.next0_
                     if (tt is not None and tt.is_hiphen):
                         tt = tt.next0_
                     if (isinstance(tt, NumberToken)):
                         break
                 BookLinkAnalyzer.__addAuthor(res, blt)
                 coef += 1
                 t = blt.end_token
                 if (isinstance(t.getReferent(), PersonReferent)):
                     corr_authors.append(
                         Utils.asObjectOrNull(t, ReferentToken))
                 blt00 = blt
                 prev_pers_templ = blt.person_template
                 start_of_name = blt.start_of_name
                 if ((start_of_name) is not None):
                     t = t.next0_
                     break
                 continue
             if (blt.typ == BookLinkTyp.ANDOTHERS):
                 coef += .5
                 t = blt.end_token.next0_
                 res.authors_and_other = True
                 break
             break
     if (t is None):
         return None
     if ((t.is_newline_before and t != t0 and num is None) and res.findSlot(
             BookLinkReferent.ATTR_AUTHOR, None, True) is None):
         return None
     if (start_of_name is None):
         if (t.chars.is_all_lower):
             coef -= (1)
         if (t.chars.is_latin_letter and not is_electr_res and num is None):
             if (res.getSlotValue(BookLinkReferent.ATTR_AUTHOR) is None):
                 return None
     tn0 = t
     tn1 = None
     uri = None
     next_num = None
     wrapnn393 = RefOutArgWrapper(0)
     inoutres394 = Utils.tryParseInt(Utils.ifNotNull(num, ""), wrapnn393)
     nn = wrapnn393.value
     if (inoutres394):
         next_num = str((nn + 1))
     br = (BracketHelper.tryParse(
         t,
         Utils.valToEnum(
             (BracketParseAttr.CANCONTAINSVERBS) |
             (BracketParseAttr.CANBEMANYLINES), BracketParseAttr), 100)
           if BracketHelper.canBeStartOfSequence(t, True, False) else None)
     if (br is not None):
         t = t.next0_
     pages = None
     first_pass2760 = True
     while True:
         if first_pass2760: first_pass2760 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (br is not None and br.end_token == t):
             tn1 = t
             break
         tit = TitleItemToken.tryAttach(t)
         if (tit is not None):
             if ((tit.typ == TitleItemToken.Types.TYP and tn0 == t
                  and br is None) and BracketHelper.canBeStartOfSequence(
                      tit.end_token.next0_, True, False)):
                 br = BracketHelper.tryParse(tit.end_token.next0_,
                                             BracketParseAttr.NO, 100)
                 if (br is not None):
                     coef += (1)
                     if (num is not None):
                         coef += 1
                     tn0 = br.begin_token
                     tn1 = br.end_token
                     res.typ = tit.value.lower()
                     t = br.end_token.next0_
                     break
         if (t.is_newline_before and t != tn0):
             if (br is not None and (t.end_char < br.end_char)):
                 pass
             elif (not MiscHelper.canBeStartOfSentence(t)):
                 pass
             else:
                 if (t.newlines_before_count > 1):
                     break
                 if ((isinstance(t, NumberToken)) and num is not None
                         and (t).int_value is not None):
                     if (num == str(((t).int_value - 1))):
                         break
                 elif (num is not None):
                     pass
                 else:
                     nnn = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.PARSEPREPOSITION) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))
                             | (NounPhraseParseAttr.MULTILINES),
                             NounPhraseParseAttr), 0)
                     if (nnn is not None and nnn.end_char >= t.end_char):
                         pass
                     else:
                         break
         if (t.isCharOf(".;") and t.whitespaces_after_count > 0):
             tit = TitleItemToken.tryAttach(t.next0_)
             if ((tit) is not None):
                 if (tit.typ == TitleItemToken.Types.TYP):
                     break
             stop = True
             words = 0
             notwords = 0
             tt = t.next0_
             first_pass2761 = True
             while True:
                 if first_pass2761: first_pass2761 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 blt0 = BookLinkToken.tryParse(tt, 0)
                 if (blt0 is None):
                     if (tt.is_newline_before):
                         break
                     if ((isinstance(tt, TextToken)) and
                             not tt.getMorphClassInDictionary().is_undefined
                         ):
                         words += 1
                     else:
                         notwords += 1
                     if (words > 6 and words > (notwords * 4)):
                         stop = False
                         break
                     continue
                 if ((blt0.typ == BookLinkTyp.DELIMETER
                      or blt0.typ == BookLinkTyp.TRANSLATE
                      or blt0.typ == BookLinkTyp.TYPE)
                         or blt0.typ == BookLinkTyp.GEO
                         or blt0.typ == BookLinkTyp.PRESS):
                     stop = False
                 break
             if (br is not None
                     and br.end_token.previous.end_char > t.end_char):
                 stop = False
             if (stop):
                 break
         if (t == decree):
             t = t.next0_
             break
         blt = BookLinkToken.tryParse(t, 0)
         if (blt is None):
             tn1 = t
             continue
         if (blt.typ == BookLinkTyp.DELIMETER):
             break
         if (((blt.typ == BookLinkTyp.MISC or blt.typ
               == BookLinkTyp.TRANSLATE or blt.typ == BookLinkTyp.NAMETAIL)
              or blt.typ == BookLinkTyp.TYPE
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.PAGERANGE
                 or blt.typ == BookLinkTyp.PAGES):
             coef += 1
             break
         if (blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS):
             if (t.previous.is_hiphen or t.previous.isCharOf(".;")
                     or blt.add_coef > 0):
                 break
         if (blt.typ == BookLinkTyp.YEAR):
             if (t.previous is not None and t.previous.is_comma):
                 break
         if (blt.typ == BookLinkTyp.ELECTRONRES):
             is_electr_res = True
             break
         if (blt.typ == BookLinkTyp.URL):
             if (t == tn0 or t.previous.isCharOf(":.")):
                 is_electr_res = True
                 break
         tn1 = t
     if (tn1 is None and start_of_name is None):
         if (is_electr_res):
             uri_re = BookLinkReferent()
             rt0 = ReferentToken(uri_re, t00, t)
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(uri_re)
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, rt0.end_token)
             ok = False
             while t is not None:
                 if (t.is_newline_before):
                     break
                 blt0 = BookLinkToken.tryParse(t, 0)
                 if (blt0 is not None):
                     if (isinstance(blt0.ref, UriReferent)):
                         uri_re.addSlot(
                             BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt0.ref, UriReferent),
                             False, 0)
                         ok = True
                     t = blt0.end_token
                 rt0.end_token = rt01.end_token = t
                 t = t.next0_
             if (ok):
                 rts0.append(rt01)
                 rts0.append(rt0)
                 return rts0
         if (decree is not None and num is not None):
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(decree.getReferent())
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, decree)
             t = decree.next0_
             while t is not None:
                 if (t.is_newline_before):
                     break
                 if (isinstance(t, TextToken)):
                     if ((t).is_pure_verb):
                         return None
                 rt01.end_token = t
                 t = t.next0_
             rts0.append(rt01)
             return rts0
         if (book_prev is not None):
             tt = t
             while tt is not None and ((tt.isCharOf(",.") or tt.is_hiphen)):
                 tt = tt.next0_
             blt0 = BookLinkToken.tryParse(tt, 0)
             if (blt0 is not None and blt0.typ == BookLinkTyp.PAGERANGE):
                 rts0 = list()
                 bref0 = BookLinkRefReferent._new389(book_prev)
                 if (num is not None):
                     bref0.number = num
                 bref0.pages = blt0.value
                 rt00 = ReferentToken(bref0, t0, blt0.end_token)
                 rts0.append(rt00)
                 return rts0
         return None
     if (br is not None
             and ((tn1 == br.end_token or tn1 == br.end_token.previous))):
         tn0 = tn0.next0_
         tn1 = tn1.previous
     if (start_of_name is None):
         while tn0 is not None:
             if (tn0.isCharOf(":,~")):
                 tn0 = tn0.next0_
             else:
                 break
     while tn1 is not None and tn1.begin_char > tn0.begin_char:
         if (tn1.isCharOf(".;,:(~") or tn1.is_hiphen
                 or tn1.isValue("РЕД", None)):
             pass
         else:
             break
         tn1 = tn1.previous
     nam = MiscHelper.getTextValue(
         tn0, tn1,
         Utils.valToEnum(
             (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
             GetTextAttr))
     if (start_of_name is not None):
         if (nam is None or (len(nam) < 3)):
             nam = start_of_name
         else:
             nam = "{0}{1}{2}".format(
                 start_of_name, (" " if tn0.is_whitespace_before else ""),
                 nam)
     if (nam is None):
         return None
     res.name = nam
     if (num is None and not is_in_lit):
         if (len(nam) < 20):
             return None
         coef -= (2)
     if (len(nam) > 500):
         coef -= (math.floor(len(nam) / 500))
     if (is_bracket_regime):
         coef -= 1
     if (len(nam) > 200):
         if (num is None):
             return None
         if (res.findSlot(BookLinkReferent.ATTR_AUTHOR, None, True) is None
                 and not BookLinkToken.checkLinkBefore(t0, num)):
             return None
     en = 0
     ru = 0
     ua = 0
     cha = 0
     nocha = 0
     chalen = 0
     lt0 = tn0
     lt1 = tn1
     if (tn1 is None):
         if (t is None):
             return None
         lt0 = t0
         lt1 = t
         tn1 = t.previous
     tt = lt0
     while tt is not None and tt.end_char <= lt1.end_char:
         if ((isinstance(tt, TextToken)) and tt.chars.is_letter):
             if (tt.chars.is_latin_letter):
                 en += 1
             elif (tt.morph.language.is_ua):
                 ua += 1
             elif (tt.morph.language.is_ru):
                 ru += 1
             if (tt.length_char > 2):
                 cha += 1
                 chalen += tt.length_char
         elif (not ((isinstance(tt, ReferentToken)))):
             nocha += 1
         tt = tt.next0_
     if (ru > (ua + en)):
         res.lang = "RU"
     elif (ua > (ru + en)):
         res.lang = "UA"
     elif (en > (ru + ua)):
         res.lang = "EN"
     if (nocha > 3 and nocha > cha and start_of_name is None):
         if (nocha > (math.floor(chalen / 3))):
             coef -= (2)
     if (res.lang == "EN"):
         tt = tn0.next0_
         first_pass2762 = True
         while True:
             if first_pass2762: first_pass2762 = False
             else: tt = tt.next0_
             if (not (tt is not None and (tt.end_char < tn1.end_char))):
                 break
             if (tt.is_comma and tt.next0_ is not None
                     and ((not tt.next0_.chars.is_all_lower or
                           (isinstance(tt.next0_, ReferentToken))))):
                 if (tt.next0_.next0_ is not None
                         and tt.next0_.next0_.is_comma_and):
                     if (isinstance(tt.next0_, ReferentToken)):
                         pass
                     else:
                         continue
                 nam = MiscHelper.getTextValue(
                     tn0, tt.previous,
                     Utils.valToEnum((GetTextAttr.KEEPQUOTES) |
                                     (GetTextAttr.KEEPREGISTER),
                                     GetTextAttr))
                 if (nam is not None and len(nam) > 15):
                     res.name = nam
                     break
     rt = ReferentToken(res, t00, tn1)
     authors = True
     edits = False
     br = (None)
     first_pass2763 = True
     while True:
         if first_pass2763: first_pass2763 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (BracketHelper.canBeStartOfSequence(t, False, False)):
             br = BracketHelper.tryParse(t, BracketParseAttr.CANBEMANYLINES,
                                         100)
             if (br is not None and br.length_char > 300):
                 br = (None)
         blt = BookLinkToken.tryParse(t, 0)
         if (t.is_newline_before and not t.isChar('/')
                 and not t.previous.isChar('/')):
             if (blt is not None and blt.typ == BookLinkTyp.NUMBER):
                 break
             if (t.previous.isCharOf(":")):
                 pass
             elif (blt is not None and ((
                 ((blt.typ == BookLinkTyp.DELIMETER or blt.typ
                   == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES)
                  or blt.typ == BookLinkTyp.GEO or blt.typ
                  == BookLinkTyp.PRESS) or blt.typ == BookLinkTyp.N))):
                 pass
             elif (num is not None and BookLinkToken.tryParseAuthor(
                     t, FioTemplateType.UNDEFINED) is not None):
                 pass
             elif (num is not None and blt is not None
                   and blt.typ != BookLinkTyp.NUMBER):
                 pass
             elif (br is not None and (t.end_char < br.end_char)
                   and t.begin_char > br.begin_char):
                 pass
             else:
                 ok = False
                 mmm = 50
                 tt = t.next0_
                 while tt is not None and mmm > 0:
                     if (tt.is_newline_before):
                         blt2 = BookLinkToken.tryParse(tt, 0)
                         if (blt2 is not None
                                 and blt2.typ == BookLinkTyp.NUMBER
                                 and blt2.value == next_num):
                             ok = True
                             break
                         if (blt2 is not None):
                             if (blt2.typ == BookLinkTyp.PAGES
                                     or blt2.typ == BookLinkTyp.GEO
                                     or blt2.typ == BookLinkTyp.PRESS):
                                 ok = True
                                 break
                     tt = tt.next0_
                     mmm -= 1
                 if (not ok):
                     npt = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.MULTILINES) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSEPREPOSITION)) |
                             (NounPhraseParseAttr.PARSEVERBS) |
                             (NounPhraseParseAttr.PARSEPRONOUNS),
                             NounPhraseParseAttr), 0)
                     if (npt is not None and npt.end_char >= t.end_char):
                         ok = True
                 if (not ok):
                     break
         rt.end_token = t
         if (blt is not None):
             rt.end_token = blt.end_token
         if (t.isCharOf(".,") or t.is_hiphen):
             continue
         if (t.isValue("С", None)):
             pass
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.EDITORS):
             edits = True
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.SOSTAVITEL):
             edits = False
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and authors):
             blt2 = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt2 is not None and blt2.typ == BookLinkTyp.PERSON):
                 prev_pers_templ = blt2.person_template
                 if (not edits):
                     BookLinkAnalyzer.__addAuthor(res, blt2)
                 coef += 1
                 t = blt2.end_token
                 continue
             if (blt2 is not None and blt2.typ == BookLinkTyp.ANDOTHERS):
                 if (not edits):
                     res.authors_and_other = True
                 coef += 1
                 t = blt2.end_token
                 continue
             authors = False
         if (blt is None):
             continue
         if (blt.typ == BookLinkTyp.ELECTRONRES
                 or blt.typ == BookLinkTyp.URL):
             is_electr_res = True
             if (blt.typ == BookLinkTyp.ELECTRONRES):
                 coef += 1.5
             else:
                 coef += .5
             if (isinstance(blt.ref, UriReferent)):
                 res.addSlot(BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt.ref, UriReferent),
                             False, 0)
         elif (blt.typ == BookLinkTyp.YEAR):
             if (res.year == 0):
                 res.year = int(blt.value)
                 coef += .5
         elif (blt.typ == BookLinkTyp.DELIMETER):
             coef += 1
             if (blt.length_char == 2):
                 regtyp = BookLinkAnalyzer.RegionTyp.SECOND
             else:
                 regtyp = BookLinkAnalyzer.RegionTyp.FIRST
         elif (
             (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TYPE
                or blt.typ == BookLinkTyp.PAGES) or blt.typ
               == BookLinkTyp.NAMETAIL or blt.typ == BookLinkTyp.TRANSLATE)
              or blt.typ == BookLinkTyp.PRESS
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.N):
             coef += 1
         elif (blt.typ == BookLinkTyp.PAGERANGE):
             pages = blt
             coef += 1
             if (is_bracket_regime and blt.end_token.next0_ is not None
                     and blt.end_token.next0_.isChar(')')):
                 coef += (2)
                 if (res.name is not None
                         and res.findSlot(BookLinkReferent.ATTR_AUTHOR,
                                          None, True) is not None):
                     coef = (10)
         elif (blt.typ == BookLinkTyp.GEO
               and ((regtyp == BookLinkAnalyzer.RegionTyp.SECOND
                     or regtyp == BookLinkAnalyzer.RegionTyp.FIRST))):
             coef += 1
         elif (blt.typ == BookLinkTyp.GEO and t.previous is not None
               and t.previous.isChar('.')):
             coef += 1
         elif (blt.typ == BookLinkTyp.ANDOTHERS):
             coef += 1
             if (authors):
                 res.authors_and_other = True
         coef += blt.add_coef
         t = blt.end_token
     if ((coef < 2.5) and num is not None):
         if (BookLinkToken.checkLinkBefore(t0, num)):
             coef += (2)
         elif (BookLinkToken.checkLinkAfter(rt.end_token, num)):
             coef += (1)
     if (rt.length_char > 500):
         return None
     if (is_in_lit):
         coef += 1
     if (coef < 2.5):
         if (is_electr_res and uri is not None):
             pass
         elif (coef >= 2 and is_in_lit):
             pass
         else:
             return None
     for rr in corr_authors:
         pits0 = PersonItemToken.tryAttachList(
             rr.begin_token, None,
             PersonItemToken.ParseAttr.CANINITIALBEDIGIT, 10)
         if (pits0 is None or (len(pits0) < 2)):
             continue
         if (pits0[0].typ == PersonItemToken.ItemType.VALUE):
             exi = False
             for i in range(len(rr.referent.slots) - 1, -1, -1):
                 s = rr.referent.slots[i]
                 if (s.type_name == PersonReferent.ATTR_LASTNAME):
                     ln = Utils.asObjectOrNull(s.value, str)
                     if (ln is None):
                         continue
                     if (ln == pits0[0].value):
                         exi = True
                         continue
                     if (ln.find('-') > 0):
                         ln = ln[0:0 + ln.find('-')]
                     if (pits0[0].begin_token.isValue(ln, None)):
                         del rr.referent.slots[i]
             if (not exi):
                 rr.referent.addSlot(PersonReferent.ATTR_LASTNAME,
                                     pits0[0].value, False, 0)
     rts = list()
     bref = BookLinkRefReferent._new389(res)
     if (num is not None):
         bref.number = num
     rt1 = ReferentToken(bref, t0, rt.end_token)
     if (pages is not None):
         if (pages.value is not None):
             bref.pages = pages.value
         rt.end_token = pages.begin_token.previous
     rts.append(rt1)
     rts.append(rt)
     return rts
示例#6
0
 def parse(t0: 'Token', lev_: int) -> 'MailLine':
     if (t0 is None):
         return None
     res = MailLine(t0, t0)
     pr = True
     t = t0
     first_pass3027 = True
     while True:
         if first_pass3027: first_pass3027 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t0 != t):
             break
         res.end_token = t
         if (t.is_table_control_char or t.is_hiphen):
             continue
         if (pr):
             if ((isinstance(t, TextToken)) and t.isCharOf(">|")):
                 res.lev += 1
             else:
                 pr = False
                 tok = MailLine.M_FROM_WORDS.tryParse(t, TerminParseAttr.NO)
                 if (tok is not None and tok.end_token.next0_ is not None
                         and tok.end_token.next0_.isChar(':')):
                     res.typ = MailLine.Types.FROM
                     t = tok.end_token.next0_
                     continue
         if (isinstance(t, ReferentToken)):
             r = t.getReferent()
             if (r is not None):
                 if ((((isinstance(r, PersonReferent)) or
                       (isinstance(r, GeoReferent)) or
                       (isinstance(r, AddressReferent)))
                      or r.type_name == "PHONE" or r.type_name == "URI")
                         or (isinstance(r, PersonPropertyReferent))
                         or r.type_name == "ORGANIZATION"):
                     res.refs.append(r)
     if (res.typ == MailLine.Types.UNDEFINED):
         t = t0
         while t is not None and (t.end_char < res.end_char):
             if (not t.is_hiphen and t.chars.is_letter):
                 break
             t = t.next0_
         ok = 0
         nams = 0
         oth = 0
         last_comma = None
         first_pass3028 = True
         while True:
             if first_pass3028: first_pass3028 = False
             else: t = t.next0_
             if (not (t is not None and (t.end_char < res.end_char))): break
             if (isinstance(t.getReferent(), PersonReferent)):
                 nams += 1
                 continue
             if (isinstance(t, TextToken)):
                 if (not t.chars.is_letter):
                     last_comma = t
                     continue
                 tok = MailLine.M_HELLO_WORDS.tryParse(
                     t, TerminParseAttr.NO)
                 if (tok is not None):
                     ok += 1
                     t = tok.end_token
                     continue
                 if (t.isValue("ВСЕ", None) or t.isValue("ALL", None)
                         or t.isValue("TEAM", None)):
                     nams += 1
                     continue
                 pit = PersonItemToken.tryAttach(
                     t, None, PersonItemToken.ParseAttr.NO, None)
                 if (pit is not None):
                     nams += 1
                     t = pit.end_token
                     continue
             oth += 1
             if ((oth) > 3):
                 if (ok > 0 and last_comma is not None):
                     res.end_token = last_comma
                     oth = 0
                 break
         if ((oth < 3) and ok > 0):
             res.typ = MailLine.Types.HELLO
     if (res.typ == MailLine.Types.UNDEFINED):
         ok_words = 0
         if (t0.isValue("HAVE", None)):
             pass
         t = t0
         first_pass3029 = True
         while True:
             if first_pass3029: first_pass3029 = False
             else: t = t.next0_
             if (not (t is not None and t.end_char <= res.end_char)): break
             if (not ((isinstance(t, TextToken)))):
                 continue
             if (t.isChar('<')):
                 br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
                 if (br is not None):
                     t = br.end_token
                     continue
             if (not t.is_letters or t.is_table_control_char):
                 continue
             tok = MailLine.M_REGARD_WORDS.tryParse(t, TerminParseAttr.NO)
             if (tok is not None):
                 ok_words += 1
                 while t is not None and t.end_char <= tok.end_char:
                     t.tag = (tok.termin)
                     t = t.next0_
                 t = tok.end_token
                 if ((isinstance(t.next0_, TextToken))
                         and t.next0_.morph.case_.is_genitive):
                     t = t.next0_
                     first_pass3030 = True
                     while True:
                         if first_pass3030: first_pass3030 = False
                         else: t = t.next0_
                         if (not (t.end_char <= res.end_char)): break
                         if (t.morph.class0_.is_conjunction):
                             continue
                         npt1 = NounPhraseHelper.tryParse(
                             t, NounPhraseParseAttr.NO, 0)
                         if (npt1 is None):
                             break
                         if (not npt1.morph.case_.is_genitive):
                             break
                         while t.end_char < npt1.end_char:
                             t.tag = (t)
                             t = t.next0_
                         t.tag = (t)
                 continue
             if ((t.morph.class0_.is_preposition or
                  t.morph.class0_.is_conjunction or t.morph.class0_.is_misc)
                     or t.isValue("C", None)):
                 continue
             if ((ok_words > 0 and t.previous is not None
                  and t.previous.is_comma)
                     and t.previous.begin_char > t0.begin_char
                     and not t.chars.is_all_lower):
                 res.end_token = t.previous
                 break
             npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
             if (npt is None):
                 if ((res.end_char - t.end_char) > 10):
                     ok_words = 0
                 break
             tok = MailLine.M_REGARD_WORDS.tryParse(npt.end_token,
                                                    TerminParseAttr.NO)
             if (tok is not None
                     and (isinstance(npt.end_token, TextToken))):
                 term = (npt.end_token).term
                 if (term == "ДЕЛ"):
                     tok = (None)
             if (tok is None):
                 if (npt.noun.isValue("НАДЕЖДА", None)):
                     t.tag = (t)
                 elif (ok_words > 0 and t.isValue("NICE", None)
                       and ((res.end_char - npt.end_char) < 13)):
                     t.tag = (t)
                 else:
                     ok_words = 0
                 break
             ok_words += 1
             while t is not None and t.end_char <= tok.end_char:
                 t.tag = (tok.termin)
                 t = t.next0_
             t = tok.end_token
         if (ok_words > 0):
             res.typ = MailLine.Types.BESTREGARDS
     if (res.typ == MailLine.Types.UNDEFINED):
         t = t0
         while t is not None and (t.end_char < res.end_char):
             if (not ((isinstance(t, TextToken)))):
                 break
             elif (not t.is_hiphen and t.chars.is_letter):
                 break
             t = t.next0_
         if (t is not None):
             if (t != t0):
                 pass
             if (((t.isValue("ПЕРЕСЫЛАЕМОЕ", None)
                   or t.isValue("ПЕРЕАДРЕСОВАННОЕ", None)))
                     and t.next0_ is not None
                     and t.next0_.isValue("СООБЩЕНИЕ", None)):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif ((t.isValue("НАЧАЛО", None) and t.next0_ is not None and
                    ((t.next0_.isValue("ПЕРЕСЫЛАЕМОЕ", None)
                      or t.next0_.isValue("ПЕРЕАДРЕСОВАННОЕ", None))))
                   and t.next0_.next0_ is not None
                   and t.next0_.next0_.isValue("СООБЩЕНИЕ", None)):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif (t.isValue("ORIGINAL", None) and t.next0_ is not None
                   and ((t.next0_.isValue("MESSAGE", None)
                         or t.next0_.isValue("APPOINTMENT", None)))):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif (t.isValue("ПЕРЕСЛАНО", None) and t.next0_ is not None
                   and t.next0_.isValue("ПОЛЬЗОВАТЕЛЕМ", None)):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif (((t.getReferent() is not None
                     and t.getReferent().type_name == "DATE"))
                   or ((t.isValue("IL", None) and t.next0_ is not None
                        and t.next0_.isValue("GIORNO", None)))
                   or ((t.isValue("ON", None) and
                        (isinstance(t.next0_, ReferentToken))
                        and t.next0_.getReferent().type_name == "DATE"))):
                 has_from = False
                 has_date = t.getReferent() is not None and t.getReferent(
                 ).type_name == "DATE"
                 if (t.is_newline_after and (lev_ < 5)):
                     res1 = MailLine.parse(t.next0_, lev_ + 1)
                     if (res1 is not None
                             and res1.typ == MailLine.Types.HELLO):
                         res.typ = MailLine.Types.FROM
                 next0__ = MailLine.parse(res.end_token.next0_, lev_ + 1)
                 if (next0__ is not None):
                     if (next0__.typ != MailLine.Types.UNDEFINED):
                         next0__ = (None)
                 tmax = res.end_char
                 if (next0__ is not None):
                     tmax = next0__.end_char
                 br1 = None
                 while t is not None and t.end_char <= tmax:
                     if (t.isValue("ОТ", None) or t.isValue("FROM", None)):
                         has_from = True
                     elif (
                             t.getReferent() is not None and
                         ((t.getReferent().type_name == "URI" or
                           (isinstance(t.getReferent(), PersonReferent))))):
                         if (t.getReferent().type_name == "URI"
                                 and has_date):
                             if (br1 is not None):
                                 has_from = True
                                 next0__ = (None)
                             if (t.previous.isChar('<')
                                     and t.next0_ is not None
                                     and t.next0_.isChar('>')):
                                 t = t.next0_
                                 if (t.next0_ is not None
                                         and t.next0_.isChar(':')):
                                     t = t.next0_
                                 if (t.is_newline_after):
                                     has_from = True
                                     next0__ = (None)
                         t = t.next0_
                         while t is not None and t.end_char <= res.end_char:
                             if (t.isValue("HA", None)
                                     and t.next0_ is not None
                                     and t.next0_.isValue("SCRITTO", None)):
                                 has_from = True
                                 break
                             elif (((t.isValue("НАПИСАТЬ", None)
                                     or t.isValue("WROTE", None)))
                                   and ((res.end_char - t.end_char) < 10)):
                                 has_from = True
                                 break
                             t = t.next0_
                         if (has_from):
                             res.typ = MailLine.Types.FROM
                             if (next0__ is not None
                                     and t.end_char >= next0__.begin_char):
                                 res.end_token = next0__.end_token
                         break
                     elif (br1 is None and not t.isChar('<')
                           and BracketHelper.canBeStartOfSequence(
                               t, True, False)):
                         br1 = BracketHelper.tryParse(
                             t, BracketParseAttr.NO, 100)
                         if (br1 is not None):
                             t = br1.end_token
                     t = t.next0_
             else:
                 has_uri = False
                 while t is not None and (t.end_char < res.end_char):
                     if (t.getReferent() is not None and
                         ((t.getReferent().type_name == "URI" or
                           (isinstance(t.getReferent(), PersonReferent))))):
                         has_uri = True
                     elif (t.isValue("ПИСАТЬ", None) and has_uri):
                         if (t.next0_ is not None and t.next0_.isChar('(')):
                             if (has_uri):
                                 res.typ = MailLine.Types.FROM
                             break
                     t = t.next0_
     return res