Exemplo n.º 1
0
 def initialize(lang: 'MorphLang' = None) -> None:
     """ Инициализация сервиса. Каждый анализатор нужно аинициализировать отдельно.
     Если вызывается Sdk.Initialize(), то там инициализация сервиса и всех анализаторов делается.
     
     Args:
         lang(MorphLang): необходимые языки (по умолчанию, русский и английский)
     
     """
     from pullenti.ner.core.internal.NumberExHelper import NumberExHelper
     from pullenti.ner.core.internal.BlockLine import BlockLine
     from pullenti.ner.core.internal.NounPhraseItem import NounPhraseItem
     from pullenti.ner.core.PrepositionHelper import PrepositionHelper
     from pullenti.ner.core.ConjunctionHelper import ConjunctionHelper
     if (ProcessorService.__m_inited):
         return
     ProcessorService.__m_inited = True
     MorphologyService.initialize(lang)
     DerivateService.initialize(lang)
     Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = True
     PrepositionHelper._initialize()
     ConjunctionHelper._initialize()
     NounPhraseItem._initialize()
     NumberHelper._initialize()
     NumberExHelper._initialize()
     BlockLine.initialize()
     Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = False
Exemplo n.º 2
0
 def find_derivates(t : 'Token') -> typing.List['DerivateGroup']:
     res = None
     cla = None
     if (isinstance(t, NounPhraseToken)): 
         t = t.noun.end_token
         cla = MorphClass.NOUN
     if (isinstance(t, TextToken)): 
         for f in t.morph.items: 
             if (isinstance(f, MorphWordForm)): 
                 if (cla is not None): 
                     if (((cla) & f.class0_).is_undefined): 
                         continue
                 res = DerivateService.find_derivates(Utils.ifNotNull(f.normal_full, f.normal_case), True, None)
                 if (res is not None and len(res) > 0): 
                     return res
         return None
     if (isinstance(t, VerbPhraseToken)): 
         return SemanticHelper.find_derivates(t.last_verb)
     if (isinstance(t, VerbPhraseItemToken)): 
         vpt = Utils.asObjectOrNull(t, VerbPhraseItemToken)
         if (vpt.verb_morph is not None): 
             res = DerivateService.find_derivates(vpt.verb_morph.normal_case, True, t.morph.language)
             if (res is None or (len(res) == 0 and vpt.verb_morph.normal_full is not None and vpt.verb_morph.normal_case != vpt.verb_morph.normal_full)): 
                 res = DerivateService.find_derivates(vpt.verb_morph.normal_full, True, t.morph.language)
         return res
     if (isinstance(t, NumberToken)): 
         if (t.value == "1"): 
             return DerivateService.find_derivates("ОДИН", True, MorphLang.RU)
     if (isinstance(t, MetaToken)): 
         return SemanticHelper.find_derivates(t.end_token)
     return None
Exemplo n.º 3
0
 def __init__(self, mt : 'MetaToken') -> None:
     self.source = None;
     self.prep = None;
     self.typ = SentItemType.UNDEFINED
     self.sub_typ = SentItemSubtype.UNDEFINED
     self.sub_sent = None;
     self.plural = -1
     self.dr_groups = None;
     self.dr_groups2 = None;
     self.part_verb_typ = NGLinkType.UNDEFINED
     self.participle_coef = 1
     self.quant = None;
     self.attrs = None
     self.can_be_question = False
     self.result = None;
     self.result_verb_last = None;
     self.__m_res_graph = None;
     self.res_frag = None;
     self.result_list = None
     self.result_list_or = False
     self.__m_begin_token = None;
     self.__m_end_token = None;
     self.source = mt
     if (isinstance(mt, NounPhraseToken)): 
         npt = Utils.asObjectOrNull(mt, NounPhraseToken)
         if (npt.preposition is not None): 
             self.prep = npt.preposition.normal
         else: 
             self.prep = ""
         self.typ = SentItemType.NOUN
         normal = npt.noun.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.MASCULINE, False)
         if (normal is not None): 
             self.dr_groups = DerivateService.find_derivates(normal, True, None)
     elif ((isinstance(mt, ReferentToken)) or (isinstance(mt, NumbersWithUnitToken))): 
         self.typ = SentItemType.NOUN
     elif (isinstance(mt, AdverbToken)): 
         self.typ = SentItemType.ADVERB
     elif (isinstance(mt, ConjunctionToken)): 
         self.typ = SentItemType.CONJ
     elif (isinstance(mt, DelimToken)): 
         self.typ = SentItemType.DELIM
     elif (isinstance(mt, VerbPhraseToken)): 
         vpt = Utils.asObjectOrNull(mt, VerbPhraseToken)
         normal = (None if vpt.first_verb.verb_morph is None else Utils.ifNotNull(vpt.first_verb.verb_morph.normal_full, vpt.first_verb.verb_morph.normal_case))
         if (normal is not None): 
             self.dr_groups = DerivateService.find_derivates(normal, True, None)
         if (vpt.first_verb != vpt.last_verb): 
             normal = (vpt.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) if vpt.last_verb.verb_morph is None else Utils.ifNotNull(vpt.last_verb.verb_morph.normal_full, vpt.last_verb.verb_morph.normal_case))
             self.dr_groups2 = DerivateService.find_derivates(normal, True, None)
         else: 
             self.dr_groups2 = self.dr_groups
         self.prep = ("" if vpt.preposition is None else vpt.preposition.normal)
         self.typ = SentItemType.VERB
Exemplo n.º 4
0
 def __calc_actant(self) -> float:
     if (self.can_be_participle):
         self.coef = -1
         return self.coef
     vf2 = self.to_verb.last_verb.verb_morph
     if (vf2 is None):
         return -1
     if (self.from_prep is None):
         self.coef = 0
         return self.coef
     fm = self.from0_.source.source.morph
     grs = DerivateService.find_derivates(
         Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None)
     if (grs is not None):
         for gr in grs:
             if (gr.cm.nexts is None or not self.from_prep in gr.cm.nexts):
                 continue
             cas = gr.cm.nexts[self.from_prep]
             if (not ((cas) & fm.case_).is_undefined):
                 self.coef = SemanticService.PARAMS.next_model
                 if (Utils.isNullOrEmpty(self.from_prep)):
                     if (fm.case_.is_nominative):
                         self.coef /= (2)
                     self.coef /= (2)
                 return self.coef
             if (self.from0_.source.source.morph.case_.is_undefined):
                 self.coef = 0
                 return self.coef
     self.coef = 0.1
     return self.coef
Exemplo n.º 5
0
 def create_noun_group(gr : 'SemGraph', npt : 'NounPhraseToken') -> 'SemObject':
     noun = npt.noun.begin_token
     sem = SemObject(gr)
     sem.tokens.append(npt.noun)
     sem.typ = SemObjectType.NOUN
     if (npt.noun.morph.class0_.is_personal_pronoun): 
         sem.typ = SemObjectType.PERSONALPRONOUN
     elif (npt.noun.morph.class0_.is_pronoun): 
         sem.typ = SemObjectType.PRONOUN
     if (npt.noun.begin_token != npt.noun.end_token): 
         sem.morph.normal_case = npt.noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
         sem.morph.normal_full = npt.noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
         sem.morph.class0_ = MorphClass.NOUN
         sem.morph.number = npt.morph.number
         sem.morph.gender = npt.morph.gender
         sem.morph.case_ = npt.morph.case_
     elif (isinstance(noun, TextToken)): 
         for wf in noun.morph.items: 
             if (wf.check_accord(npt.morph, False, False) and (isinstance(wf, MorphWordForm))): 
                 CreateHelper._set_morph(sem, Utils.asObjectOrNull(wf, MorphWordForm))
                 break
         if (sem.morph.normal_case is None): 
             sem.morph.normal_case = noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
             sem.morph.normal_full = noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
         grs = DerivateService.find_derivates(sem.morph.normal_full, True, None)
         if (grs is not None and len(grs) > 0): 
             sem.concept = (grs[0])
     elif (isinstance(noun, ReferentToken)): 
         r = noun.referent
         if (r is None): 
             return None
         sem.morph.normal_case = str(r)
         sem.morph.normal_full = sem.morph.normal_case
         sem.concept = (r)
     elif (isinstance(noun, NumberToken)): 
         num = Utils.asObjectOrNull(noun, NumberToken)
         sem.morph.gender = noun.morph.gender
         sem.morph.number = noun.morph.number
         if (num.int_value is not None): 
             sem.morph.normal_case = NumberHelper.get_number_adjective(num.int_value, noun.morph.gender, noun.morph.number)
             sem.morph.normal_full = NumberHelper.get_number_adjective(num.int_value, MorphGender.MASCULINE, MorphNumber.SINGULAR)
         else: 
             sem.morph.normal_case = noun.get_source_text().upper()
             sem.morph.normal_full = sem.morph.normal_case
     noun.tag = (sem)
     if (len(npt.adjectives) > 0): 
         for a in npt.adjectives: 
             if (npt.multi_nouns and a != npt.adjectives[0]): 
                 break
             asem = CreateHelper.create_npt_adj(gr, npt, a)
             if (asem is not None): 
                 gr.add_link(SemLinkType.DETAIL, sem, asem, "какой", False, None)
     if (npt.internal_noun is not None): 
         intsem = CreateHelper.create_noun_group(gr, npt.internal_noun)
         if (intsem is not None): 
             gr.add_link(SemLinkType.DETAIL, sem, intsem, None, False, None)
     gr.objects.append(sem)
     return sem
Exemplo n.º 6
0
 def create_adverb(gr : 'SemGraph', adv : 'AdverbToken') -> 'SemObject':
     res = SemObject(gr)
     gr.objects.append(res)
     res.tokens.append(adv)
     res.typ = SemObjectType.ADVERB
     res.not0_ = adv.not0_
     res.morph.normal_full = adv.spelling
     res.morph.normal_case = res.morph.normal_full
     grs = DerivateService.find_derivates(res.morph.normal_full, True, None)
     if (grs is not None and len(grs) > 0): 
         res.concept = (grs[0])
     return res
Exemplo n.º 7
0
 def create_npt_adj(gr : 'SemGraph', npt : 'NounPhraseToken', a : 'MetaToken') -> 'SemObject':
     if (a.morph.class0_.is_pronoun): 
         asem = SemObject(gr)
         gr.objects.append(asem)
         asem.tokens.append(a)
         asem.typ = (SemObjectType.PERSONALPRONOUN if a.begin_token.morph.class0_.is_personal_pronoun else SemObjectType.PRONOUN)
         for it in a.begin_token.morph.items: 
             wf = Utils.asObjectOrNull(it, MorphWordForm)
             if (wf is None): 
                 continue
             if (not npt.morph.case_.is_undefined): 
                 if (((npt.morph.case_) & wf.case_).is_undefined): 
                     continue
             CreateHelper._set_morph(asem, wf)
             if (asem.morph.normal_full == "КАКОВ"): 
                 asem.morph.normal_full = "КАКОЙ"
             break
         if (asem.morph.normal_full is None): 
             asem.morph.normal_case = a.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
             asem.morph.normal_full = asem.morph.normal_case
         return asem
     if (not a.morph.class0_.is_verb): 
         asem = SemObject(gr)
         gr.objects.append(asem)
         asem.tokens.append(a)
         asem.typ = SemObjectType.ADJECTIVE
         for wf in a.begin_token.morph.items: 
             if (wf.check_accord(npt.morph, False, False) and wf.class0_.is_adjective and (isinstance(wf, MorphWordForm))): 
                 CreateHelper._set_morph(asem, Utils.asObjectOrNull(wf, MorphWordForm))
                 break
         if (asem.morph.normal_case is None): 
             asem.morph.normal_case = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
             asem.morph.normal_full = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, MorphGender.MASCULINE, False)
             CreateHelper._set_morph0(asem, a.begin_token.morph)
         grs = DerivateService.find_derivates(asem.morph.normal_full, True, None)
         if (grs is not None and len(grs) > 0): 
             asem.concept = (grs[0])
         return asem
     return None
Exemplo n.º 8
0
 def create_verb_group(gr : 'SemGraph', vpt : 'VerbPhraseToken') -> 'SemObject':
     sems = list()
     attrs = list()
     adverbs = list()
     i = 0
     first_pass3439 = True
     while True:
         if first_pass3439: first_pass3439 = False
         else: i += 1
         if (not (i < len(vpt.items))): break
         v = vpt.items[i]
         if (v.is_adverb): 
             adv = AdverbToken.try_parse(v.begin_token)
             if (adv is None): 
                 continue
             if (adv.typ != SemAttributeType.UNDEFINED): 
                 attrs.append(SemAttribute._new2912(adv.not0_, adv.typ, adv.spelling))
                 continue
             adverb = CreateHelper.create_adverb(gr, adv)
             if (len(attrs) > 0): 
                 adverb.attrs.extend(attrs)
                 attrs.clear()
             adverbs.append(adverb)
             continue
         if (v.normal == "БЫТЬ"): 
             j = 0
             j = (i + 1)
             while j < len(vpt.items): 
                 if (not vpt.items[j].is_adverb): 
                     break
                 j += 1
             if (j < len(vpt.items)): 
                 continue
         sem = SemObject(gr)
         gr.objects.append(sem)
         sem.tokens.append(v)
         v.tag = (sem)
         CreateHelper._set_morph(sem, v.verb_morph)
         sem.morph.normal_full = v.normal
         sem.morph.normal_case = sem.morph.normal_full
         if (v.is_participle or v.is_dee_participle): 
             sem.typ = SemObjectType.PARTICIPLE
             sem.morph.normal_full = (Utils.ifNotNull(v.end_token.get_normal_case_text(MorphClass.VERB, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False), (sem.morph.normal_case if sem is not None and sem.morph is not None else None)))
             sem.morph.normal_case = v.end_token.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
             if (sem.morph.normal_case == sem.morph.normal_full and v.normal.endswith("Й")): 
                 grs2 = DerivateService.find_derivates(v.normal, True, None)
                 if (grs2 is not None): 
                     for g in grs2: 
                         for w in g.words: 
                             if (w.lang == v.end_token.morph.language and w.class0_.is_verb and not w.class0_.is_adjective): 
                                 sem.morph.normal_full = w.spelling
                                 break
             elif (sem.morph.normal_case == sem.morph.normal_full and v.is_participle and sem.morph.normal_full.endswith("Ь")): 
                 for it in v.end_token.morph.items: 
                     wf = Utils.asObjectOrNull(it, MorphWordForm)
                     if (wf is None): 
                         continue
                     if (wf.normal_case.endswith("Й") or ((wf.normal_full is not None and wf.normal_full.endswith("Й")))): 
                         sem.morph.normal_case = (Utils.ifNotNull(wf.normal_full, wf.normal_case))
                         break
                 if (sem.morph.normal_case == sem.morph.normal_full): 
                     grs2 = DerivateService.find_derivates(sem.morph.normal_case, True, None)
                     if (grs2 is not None): 
                         for g in grs2: 
                             for w in g.words: 
                                 if (w.lang == v.end_token.morph.language and w.class0_.is_verb and w.class0_.is_adjective): 
                                     sem.morph.normal_case = w.spelling
                                     break
                             break
         else: 
             sem.typ = SemObjectType.VERB
         if (v.verb_morph is not None and v.verb_morph.contains_attr("возвр.", None)): 
             if (sem.morph.normal_full.endswith("СЯ") or sem.morph.normal_full.endswith("СЬ")): 
                 sem.morph.normal_full = sem.morph.normal_full[0:0+len(sem.morph.normal_full) - 2]
         grs = DerivateService.find_derivates(sem.morph.normal_full, True, None)
         if (grs is not None and len(grs) > 0): 
             sem.concept = (grs[0])
             if (v.verb_morph is not None and v.verb_morph.misc.aspect == MorphAspect.IMPERFECTIVE): 
                 for w in grs[0].words: 
                     if (w.class0_.is_verb and not w.class0_.is_adjective): 
                         if (w.aspect == MorphAspect.PERFECTIVE): 
                             sem.morph.normal_full = w.spelling
                             break
         sem.not0_ = v.not0_
         sems.append(sem)
         if (len(attrs) > 0): 
             sem.attrs.extend(attrs)
             attrs.clear()
         if (len(adverbs) > 0): 
             for a in adverbs: 
                 gr.add_link(SemLinkType.DETAIL, sem, a, "как", False, None)
         adverbs.clear()
     if (len(sems) == 0): 
         return None
     if (len(attrs) > 0): 
         sems[len(sems) - 1].attrs.extend(attrs)
     if (len(adverbs) > 0): 
         sem = sems[len(sems) - 1]
         for a in adverbs: 
             gr.add_link(SemLinkType.DETAIL, sem, a, "как", False, None)
     for i in range(len(sems) - 1, 0, -1):
         gr.add_link(SemLinkType.DETAIL, sems[i - 1], sems[i], "что делать", False, None)
     return sems[0]
Exemplo n.º 9
0
 def process(self, kit : 'AnalysisKit') -> None:
     # Основная функция выделения телефонов
     ad = kit.get_analyzer_data(self)
     has_denoms = False
     for a in kit.processor.analyzers: 
         if ((isinstance(a, DenominationAnalyzer)) and not a.ignore_this_analyzer): 
             has_denoms = True
     if (not has_denoms): 
         a = DenominationAnalyzer()
         a.process(kit)
     li = list()
     tmp = io.StringIO()
     tmp2 = list()
     max0_ = 0
     t = kit.first_token
     while t is not None: 
         max0_ += 1
         t = t.next0_
     cur = 0
     t = kit.first_token
     first_pass3292 = True
     while True:
         if first_pass3292: first_pass3292 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         r = t.get_referent()
         if (r is not None): 
             t = self.__add_referents(ad, t, cur, max0_)
             continue
         if (not (isinstance(t, TextToken))): 
             continue
         if (not t.chars.is_letter or (t.length_char < 3)): 
             continue
         term = t.term
         if (term == "ЕСТЬ"): 
             if ((isinstance(t.previous, TextToken)) and t.previous.morph.class0_.is_verb): 
                 pass
             else: 
                 continue
         npt = None
         npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.ADJECTIVECANBELAST) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
         if (npt is None): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_verb and not mc.is_preposition): 
                 if (t.is_verb_be): 
                     continue
                 if (t.is_value("МОЧЬ", None) or t.is_value("WOULD", None)): 
                     continue
                 kref = KeywordReferent._new1595(KeywordType.PREDICATE)
                 norm = t.get_normal_case_text(MorphClass.VERB, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                 if (norm is None): 
                     norm = t.lemma
                 if (norm.endswith("ЬСЯ")): 
                     norm = norm[0:0+len(norm) - 2]
                 kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
                 drv = DerivateService.find_derivates(norm, True, t.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
                 kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
                 KeywordAnalyzer.__set_rank(kref, cur, max0_)
                 rt1 = ReferentToken._new734(ad.register_referent(kref), t, t, t.morph)
                 kit.embed_token(rt1)
                 t = (rt1)
                 continue
             continue
         if (npt.internal_noun is not None): 
             continue
         if (npt.end_token.is_value("ЦЕЛОМ", None) or npt.end_token.is_value("ЧАСТНОСТИ", None)): 
             if (npt.preposition is not None): 
                 t = npt.end_token
                 continue
         if (npt.end_token.is_value("СТОРОНЫ", None) and npt.preposition is not None and npt.preposition.normal == "С"): 
             t = npt.end_token
             continue
         if (npt.begin_token == npt.end_token): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_preposition): 
                 continue
             elif (mc.is_adverb): 
                 if (t.is_value("ПОТОМ", None)): 
                     continue
         else: 
             pass
         li.clear()
         t0 = t
         tt = t
         first_pass3293 = True
         while True:
             if first_pass3293: first_pass3293 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= npt.end_char)): break
             if (not (isinstance(tt, TextToken))): 
                 continue
             if (tt.is_value("NATURAL", None)): 
                 pass
             if ((tt.length_char < 3) or not tt.chars.is_letter): 
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if ((mc.is_preposition or mc.is_pronoun or mc.is_personal_pronoun) or mc.is_conjunction): 
                 if (tt.is_value("ОТНОШЕНИЕ", None)): 
                     pass
                 else: 
                     continue
             if (mc.is_misc): 
                 if (MiscHelper.is_eng_article(tt)): 
                     continue
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             norm = tt.lemma
             kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
             if (norm != "ЕСТЬ"): 
                 drv = DerivateService.find_derivates(norm, True, tt.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, tt, tt, tt.morph)
             kit.embed_token(rt1)
             if (tt == t and len(li) == 0): 
                 t0 = (rt1)
             t = (rt1)
             li.append(kref)
         if (len(li) > 1): 
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.clear()
             has_norm = False
             for kw in li: 
                 s = kw.get_string_value(KeywordReferent.ATTR_VALUE)
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
                 n = kw.get_string_value(KeywordReferent.ATTR_NORMAL)
                 if (n is not None): 
                     has_norm = True
                     tmp2.append(n)
                 else: 
                     tmp2.append(s)
                 kref.add_slot(KeywordReferent.ATTR_REF, kw, False, 0)
             val = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
             kref.add_slot(KeywordReferent.ATTR_VALUE, val, False, 0)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.sort()
             for s in tmp2: 
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
             norm = Utils.toStringStringIO(tmp)
             if (norm != val): 
                 kref.add_slot(KeywordReferent.ATTR_NORMAL, norm, False, 0)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, t0, t, npt.morph)
             kit.embed_token(rt1)
             t = (rt1)
     cur = 0
     t = kit.first_token
     first_pass3294 = True
     while True:
         if first_pass3294: first_pass3294 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         kw = Utils.asObjectOrNull(t.get_referent(), KeywordReferent)
         if (kw is None or kw.typ != KeywordType.OBJECT): 
             continue
         if (t.next0_ is None or kw.child_words > 2): 
             continue
         t1 = t.next0_
         if (t1.is_value("OF", None) and (t1.whitespaces_after_count < 3) and t1.next0_ is not None): 
             t1 = t1.next0_
             if ((isinstance(t1, TextToken)) and MiscHelper.is_eng_article(t1) and t1.next0_ is not None): 
                 t1 = t1.next0_
         elif (not t1.morph.case_.is_genitive or t.whitespaces_after_count > 1): 
             continue
         kw2 = Utils.asObjectOrNull(t1.get_referent(), KeywordReferent)
         if (kw2 is None): 
             continue
         if (kw == kw2): 
             continue
         if (kw2.typ != KeywordType.OBJECT or (kw.child_words + kw2.child_words) > 3): 
             continue
         kw_un = KeywordReferent()
         kw_un._union(kw, kw2, MiscHelper.get_text_value(t1, t1, GetTextAttr.NO))
         kw_un = (Utils.asObjectOrNull(ad.register_referent(kw_un), KeywordReferent))
         KeywordAnalyzer.__set_rank(kw_un, cur, max0_)
         rt1 = ReferentToken._new734(kw_un, t, t1, t.morph)
         kit.embed_token(rt1)
         t = (rt1)
     if (KeywordAnalyzer.SORT_KEYWORDS_BY_RANK): 
         all0_ = list(ad.referents)
         all0_.sort(key=operator.attrgetter('rank'), reverse=True)
         ad.referents = all0_
     if (KeywordAnalyzer.ANNOTATION_MAX_SENTENCES > 0): 
         ano = AutoannoSentToken.create_annotation(kit, KeywordAnalyzer.ANNOTATION_MAX_SENTENCES)
         if (ano is not None): 
             ad.register_referent(ano)
Exemplo n.º 10
0
 def __calc_pacient(self, noplural: bool) -> float:
     if (not Utils.isNullOrEmpty(self.from_prep)):
         self.coef = -1
         return self.coef
     vf = self.to_verb.first_verb.verb_morph
     if (vf is None):
         return -1
     vf2 = self.to_verb.last_verb.verb_morph
     if (vf2 is None):
         return -1
     morph_ = self.from_morph
     if (vf2.misc.voice == MorphVoice.PASSIVE
             or self.to_verb.last_verb.morph.contains_attr(
                 "страд.з.", None)):
         if (vf.number == MorphNumber.PLURAL):
             if (noplural):
                 if (self.from_is_plural):
                     pass
                 elif (not NGLink.__check_morph_accord(morph_, False, vf)):
                     return -1
                 elif (len(morph_.items) > 0 and not vf.case_.is_undefined):
                     ok = False
                     for it in morph_.items:
                         if (((it.number) & (MorphNumber.PLURAL)) == (
                                 MorphNumber.PLURAL)):
                             if (not it.case_.is_undefined and
                                 ((it.case_) & vf.case_).is_undefined):
                                 continue
                             ok = True
                             break
                     if (not ok):
                         self.coef = -1
                         return self.coef
             self.coef = SemanticService.PARAMS.verb_plural
             self.plural = 1
         else:
             if (vf.number == MorphNumber.SINGULAR):
                 self.plural = 0
                 if (self.from_is_plural):
                     return -1
             if (not NGLink.__check_morph_accord(morph_, False, vf)):
                 return -1
             self.coef = SemanticService.PARAMS.morph_accord
         return self.coef
     is_trans = False
     is_ref_dative = False
     grs = DerivateService.find_derivates(
         Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None)
     if (grs is not None):
         for gr in grs:
             if (gr.cm.transitive):
                 is_trans = True
             if (gr.cm_rev.agent is not None
                     and not gr.cm_rev.agent.case_.is_nominative):
                 is_ref_dative = True
     if (NGLink.__is_rev_verb(vf2)):
         if (not Utils.isNullOrEmpty(self.from_prep)):
             return -1
         if (not morph_.case_.is_undefined):
             if (is_ref_dative):
                 if (morph_.case_.is_nominative):
                     self.coef = SemanticService.PARAMS.transitive_coef
                     return self.coef
             elif (morph_.case_.is_instrumental):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 return self.coef
             return -1
         self.coef = 0
         return self.coef
     if (vf2 != vf and not is_trans):
         grs = DerivateService.find_derivates(
             Utils.ifNotNull(vf.normal_full, vf.normal_case), True, None)
         if (grs is not None):
             for gr in grs:
                 if (gr.cm.transitive):
                     is_trans = True
     if (is_trans):
         if (not Utils.isNullOrEmpty(self.from_prep)):
             return -1
         if (not morph_.case_.is_undefined):
             if (morph_.case_.is_accusative):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 if (morph_.case_.is_dative):
                     self.coef /= (2)
                 if (morph_.case_.is_genitive):
                     self.coef /= (2)
                 if (morph_.case_.is_instrumental):
                     self.coef /= (2)
                 return self.coef
             else:
                 return -1
     if (vf2.normal_case == "БЫТЬ"):
         if (not Utils.isNullOrEmpty(self.from_prep)):
             return -1
         if (morph_.case_.is_instrumental):
             self.coef = SemanticService.PARAMS.transitive_coef
             return self.coef
         if (morph_.case_.is_nominative):
             if (self.from0_.source.begin_token.begin_char >
                     self.to_verb.end_char):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 return self.coef
             else:
                 self.coef = SemanticService.PARAMS.transitive_coef / (2)
                 return self.coef
         if (morph_.case_.is_undefined):
             self.coef = SemanticService.PARAMS.transitive_coef / (2)
             return self.coef
     return -1
Exemplo n.º 11
0
 def __calc_agent(self, noplural: bool) -> float:
     if (not Utils.isNullOrEmpty(self.from_prep)):
         self.coef = -1
         return self.coef
     vf = self.to_verb.first_verb.verb_morph
     if (vf is None):
         self.coef = -1
         return self.coef
     vf2 = self.to_verb.last_verb.verb_morph
     if (vf2 is None):
         self.coef = -1
         return self.coef
     if (vf.misc.mood == MorphMood.IMPERATIVE):
         self.coef = -1
         return self.coef
     morph_ = self.from_morph
     if (vf2.misc.voice == MorphVoice.PASSIVE
             or self.to_verb.last_verb.morph.contains_attr(
                 "страд.з.", None)):
         if (not morph_.case_.is_undefined):
             if (morph_.case_.is_instrumental):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 if (vf2.case_.is_instrumental):
                     self.coef /= (2)
                 return self.coef
             self.coef = -1
             return self.coef
         self.coef = 0
         return self.coef
     if ("инф." in vf.misc.attrs):
         self.coef = -1
         return self.coef
     if (NGLink.__is_rev_verb(vf2)):
         ag_case = MorphCase.UNDEFINED
         grs = DerivateService.find_derivates(
             Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None)
         if (grs is not None):
             for gr in grs:
                 if (gr.cm_rev.agent is not None):
                     ag_case = gr.cm_rev.agent.case_
                     break
         if (not morph_.case_.is_undefined):
             if (ag_case.is_dative):
                 if (morph_.case_.is_dative):
                     self.coef = SemanticService.PARAMS.transitive_coef
                     if (morph_.case_.is_genitive):
                         self.coef /= (2)
                     return self.coef
                 self.coef = -1
                 return self.coef
             if (ag_case.is_instrumental):
                 if (morph_.case_.is_instrumental):
                     if (morph_.case_.is_nominative):
                         self.coef = 0
                         return self.coef
                     self.coef = SemanticService.PARAMS.transitive_coef
                     return self.coef
                 self.coef = -1
                 return self.coef
             if (not morph_.case_.is_nominative):
                 self.coef = -1
                 return self.coef
         else:
             self.coef = 0
             return self.coef
     if (vf.number == MorphNumber.PLURAL):
         if (not morph_.case_.is_undefined):
             if (vf.case_.is_undefined):
                 if (not morph_.case_.is_nominative):
                     self.coef = -1
                     return self.coef
             elif (((vf.case_) & morph_.case_).is_undefined):
                 self.coef = -1
                 return self.coef
         if (noplural):
             if (self.from_is_plural):
                 pass
             elif (((morph_.number) &
                    (MorphNumber.PLURAL)) == (MorphNumber.UNDEFINED)):
                 self.coef = -1
                 return self.coef
             elif (not NGLink.__check_morph_accord(morph_, False, vf)):
                 self.coef = -1
                 return self.coef
             elif (len(morph_.items) > 0 and not vf.case_.is_undefined):
                 ok = False
                 for it in morph_.items:
                     if (((it.number) &
                          (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)):
                         if (not it.case_.is_undefined
                                 and ((it.case_) & vf.case_).is_undefined):
                             continue
                         ok = True
                         break
                 if (not ok):
                     self.coef = -1
                     return self.coef
         self.plural = 1
         self.coef = SemanticService.PARAMS.verb_plural
         if (vf2.normal_case == "БЫТЬ"):
             if (morph_.case_.is_undefined
                     and self.from0_.source.begin_token.begin_char >
                     self.to_verb.end_char):
                 self.coef /= (2)
     else:
         if (vf.number == MorphNumber.SINGULAR):
             self.plural = 0
             if (self.from_is_plural):
                 self.coef = -1
                 return self.coef
         if (not NGLink.__check_morph_accord(morph_, False, vf)):
             self.coef = -1
             return self.coef
         if (not morph_.case_.is_undefined):
             if (not morph_.case_.is_nominative):
                 if (self.to_verb.first_verb.is_participle):
                     pass
                 else:
                     self.coef = -1
                     return self.coef
         if (vf.misc.person != MorphPerson.UNDEFINED):
             if (((vf.misc.person) &
                  (MorphPerson.THIRD)) == (MorphPerson.UNDEFINED)):
                 if (((vf.misc.person) &
                      (MorphPerson.FIRST)) == (MorphPerson.FIRST)):
                     if (not morph_.contains_attr("1 л.", None)):
                         self.coef = -1
                         return self.coef
                 if (((vf.misc.person) &
                      (MorphPerson.SECOND)) == (MorphPerson.SECOND)):
                     if (not morph_.contains_attr("2 л.", None)):
                         self.coef = -1
                         return self.coef
         self.coef = SemanticService.PARAMS.morph_accord
         if (morph_.case_.is_undefined):
             self.coef /= (4)
     return self.coef
Exemplo n.º 12
0
 def __try_parse_ru(t: 'Token', can_be_partition: bool,
                    can_be_adj_partition: bool,
                    force_parse: bool) -> 'VerbPhraseToken':
     res = None
     t0 = t
     not0_ = None
     has_verb = False
     verb_be_before = False
     prep = None
     first_pass3070 = True
     while True:
         if first_pass3070: first_pass3070 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (not (isinstance(t, TextToken))):
             break
         tt = Utils.asObjectOrNull(t, TextToken)
         is_participle = False
         if (tt.term == "НЕ"):
             not0_ = t
             continue
         ty = 0
         norm = None
         mc = tt.get_morph_class_in_dictionary()
         if (tt.term == "НЕТ"):
             if (has_verb):
                 break
             ty = 1
         elif (tt.term == "ДОПУСТИМО"):
             ty = 3
         elif (mc.is_adverb and not mc.is_verb):
             ty = 2
         elif (tt.is_pure_verb or tt.is_verb_be):
             ty = 1
             if (has_verb):
                 if (not tt.morph.contains_attr("инф.", None)):
                     if (verb_be_before):
                         pass
                     else:
                         break
         elif (mc.is_verb):
             if (mc.is_preposition or mc.is_misc or mc.is_pronoun):
                 pass
             elif (mc.is_noun):
                 if (tt.term == "СТАЛИ" or tt.term == "СТЕКЛО"
                         or tt.term == "БЫЛИ"):
                     ty = 1
                 elif (not tt.chars.is_all_lower
                       and not MiscHelper.can_be_start_of_sentence(tt)):
                     ty = 1
                 elif (mc.is_adjective and can_be_partition):
                     ty = 1
                 elif (force_parse):
                     ty = 1
             elif (mc.is_proper):
                 if (tt.chars.is_all_lower):
                     ty = 1
             else:
                 ty = 1
             if (mc.is_adjective):
                 is_participle = True
             if (not tt.morph.case_.is_undefined):
                 is_participle = True
             if (not can_be_partition and is_participle):
                 break
             if (has_verb):
                 if (tt.morph.contains_attr("инф.", None)):
                     pass
                 elif (not is_participle):
                     pass
                 else:
                     break
         elif ((mc.is_adjective and tt.morph.contains_attr("к.ф.", None)
                and tt.term.endswith("О")) and NounPhraseHelper.try_parse(
                    tt, NounPhraseParseAttr.NO, 0, None) is None):
             ty = 2
         elif (mc.is_adjective
               and ((can_be_partition or can_be_adj_partition))):
             if (tt.morph.contains_attr("к.ф.", None)
                     and not can_be_adj_partition):
                 break
             norm = tt.get_normal_case_text(MorphClass.ADJECTIVE,
                                            MorphNumber.SINGULAR,
                                            MorphGender.MASCULINE, False)
             if (norm.endswith("ЙШИЙ")):
                 pass
             else:
                 grs = DerivateService.find_derivates(norm, True, None)
                 if (grs is not None and len(grs) > 0):
                     hverb = False
                     hpart = False
                     for gr in grs:
                         for w in gr.words:
                             if (w.class0_.is_adjective
                                     and w.class0_.is_verb):
                                 if (w.spelling == norm):
                                     hpart = True
                             elif (w.class0_.is_verb):
                                 hverb = True
                     if (hpart and hverb):
                         ty = 3
                     elif (can_be_adj_partition):
                         ty = 3
                     if (ty != 3 and not Utils.isNullOrEmpty(grs[0].prefix)
                             and norm.startswith(grs[0].prefix)):
                         hverb = False
                         hpart = False
                         norm1 = norm[len(grs[0].prefix):]
                         grs = DerivateService.find_derivates(
                             norm1, True, None)
                         if (grs is not None and len(grs) > 0):
                             for gr in grs:
                                 for w in gr.words:
                                     if (w.class0_.is_adjective
                                             and w.class0_.is_verb):
                                         if (w.spelling == norm1):
                                             hpart = True
                                     elif (w.class0_.is_verb):
                                         hverb = True
                         if (hpart and hverb):
                             ty = 3
         if (ty == 0 and t == t0 and can_be_partition):
             prep = PrepositionHelper.try_parse(t)
             if (prep is not None):
                 t = prep.end_token
                 continue
         if (ty == 0):
             break
         if (res is None):
             res = VerbPhraseToken(t0, t)
         res.end_token = t
         it = VerbPhraseItemToken._new603(t, t, MorphCollection(t.morph))
         if (not0_ is not None):
             it.begin_token = not0_
             it.not0_ = True
             not0_ = (None)
         it.is_adverb = ty == 2
         if (prep is not None and not t.morph.case_.is_undefined
                 and len(res.items) == 0):
             if (((prep.next_case) & t.morph.case_).is_undefined):
                 return None
             it.morph.remove_items(prep.next_case, False)
             res.preposition = prep
         if (norm is None):
             norm = t.get_normal_case_text(
                 (MorphClass.ADJECTIVE if ty == 3 else
                  (MorphClass.ADVERB if ty == 2 else MorphClass.VERB)),
                 MorphNumber.SINGULAR, MorphGender.MASCULINE, False)
             if (ty == 1 and not tt.morph.case_.is_undefined):
                 mi = MorphWordForm._new604(MorphCase.NOMINATIVE,
                                            MorphNumber.SINGULAR,
                                            MorphGender.MASCULINE)
                 for mit in tt.morph.items:
                     if (isinstance(mit, MorphWordForm)):
                         mi.misc = mit.misc
                         break
                 nnn = MorphologyService.get_wordform("КК" + t.term, mi)
                 if (nnn is not None):
                     norm = nnn[2:]
         it.normal = norm
         res.items.append(it)
         if (not has_verb and ((ty == 1 or ty == 3))):
             res.morph = it.morph
             has_verb = True
         if (ty == 1 or ty == 3):
             if (ty == 1 and tt.is_verb_be):
                 verb_be_before = True
             else:
                 verb_be_before = False
     if (not has_verb):
         return None
     for i in range(len(res.items) - 1, 0, -1):
         if (res.items[i].is_adverb):
             del res.items[i]
             res.end_token = res.items[i - 1].end_token
         else:
             break
     return res