def __try_parse_ru(first: 'Token', typ: 'NounPhraseParseAttr', max_char_pos: int, def_noun: 'NounPhraseItem' = None) -> 'NounPhraseToken': if (first is None): return None items = None adverbs = None prep = None kak = False t0 = first if ((((typ) & (NounPhraseParseAttr.PARSEPREPOSITION))) != (NounPhraseParseAttr.NO) and t0.is_value("КАК", None)): t0 = t0.next0_ prep = PrepositionHelper.try_parse(t0) if (prep is not None): t0 = prep.end_token.next0_ kak = True internal_noun_prase = None conj_before = False t = t0 first_pass3041 = True while True: if first_pass3041: first_pass3041 = False else: t = t.next0_ if (not (t is not None)): break if (max_char_pos > 0 and t.begin_char > max_char_pos): break if ((t.morph.class0_.is_conjunction and not t.morph.class0_.is_adjective and not t.morph.class0_.is_pronoun) and not t.morph.class0_.is_noun): if (conj_before): break if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) != (NounPhraseParseAttr.NO)): break if (items is not None and ((t.is_and or t.is_or))): conj_before = True if ((t.next0_ is not None and t.next0_.is_char_of("\\/") and t.next0_.next0_ is not None) and t.next0_.next0_.is_or): t = t.next0_.next0_ if (((t.next0_ is not None and t.next0_.is_char('(') and t.next0_.next0_ is not None) and t.next0_.next0_.is_or and t.next0_.next0_.next0_ is not None) and t.next0_.next0_.next0_.is_char(')')): t = t.next0_.next0_.next0_ continue break elif (t.is_comma): if (conj_before or items is None): break if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) != (NounPhraseParseAttr.NO)): break mc = t.previous.get_morph_class_in_dictionary() if (mc.is_proper_surname or mc.is_proper_secname): break conj_before = True if (kak and t.next0_ is not None and t.next0_.is_value("ТАК", None)): t = t.next0_ if (t.next0_ is not None and t.next0_.is_and): t = t.next0_ pr = PrepositionHelper.try_parse(t.next0_) if (pr is not None): t = pr.end_token if (items[len(items) - 1].can_be_noun and items[len(items) - 1].end_token.morph.class0_.is_pronoun): break continue elif (t.is_char('(')): if (items is None): return None brr = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (brr is None): break if (brr.length_char > 100): break t = brr.end_token continue if (isinstance(t, ReferentToken)): if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == ( NounPhraseParseAttr.NO)): break elif (t.chars.is_latin_letter): break it = NounPhraseItem.try_parse(t, items, typ) if (it is None or ((not it.can_be_adj and not it.can_be_noun))): if (((it is not None and items is not None and t.chars.is_capital_upper) and (t.whitespaces_before_count < 3) and t.length_char > 3) and not t.get_morph_class_in_dictionary().is_noun and not t.get_morph_class_in_dictionary().is_adjective): it.can_be_noun = True items.append(it) break if ((((typ) & (NounPhraseParseAttr.PARSEADVERBS))) != (NounPhraseParseAttr.NO) and (isinstance(t, TextToken)) and t.morph.class0_.is_adverb): if (adverbs is None): adverbs = list() adverbs.append(Utils.asObjectOrNull(t, TextToken)) continue break it.conj_before = conj_before conj_before = False if (not it.can_be_adj and not it.can_be_noun): break if (t.is_newline_before and t != first): if ((((typ) & (NounPhraseParseAttr.MULTILINES))) != (NounPhraseParseAttr.NO)): pass elif (items is not None and t.chars != items[len(items) - 1].chars): if (t.chars.is_all_lower and items[len(items) - 1].chars.is_capital_upper): pass else: break if (items is None): items = list() else: it0 = items[len(items) - 1] if (it0.can_be_noun and it0.is_personal_pronoun): if (it.is_pronoun): break if ((it0.begin_token.previous is not None and it0.begin_token.previous. get_morph_class_in_dictionary().is_verb and not it0.begin_token.previous. get_morph_class_in_dictionary().is_adjective) and not it0.begin_token.previous. get_morph_class_in_dictionary().is_preposition): if (t.morph.case_.is_nominative or t.morph.case_.is_accusative): pass else: break if (it.can_be_noun and it.is_verb): if (it0.previous is None): pass elif ((isinstance(it0.previous, TextToken)) and not it0.previous.chars.is_letter): pass else: break items.append(it) t = it.end_token if (t.is_newline_after and not t.chars.is_all_lower): mc = t.get_morph_class_in_dictionary() if (mc.is_proper_surname): break if (t.morph.class0_.is_proper_surname and mc.is_undefined): break if (items is None): return None tt1 = None if (len(items) == 1 and items[0].can_be_adj): and0_ = False tt1 = items[0].end_token.next0_ first_pass3042 = True while True: if first_pass3042: first_pass3042 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1.is_and or tt1.is_or): and0_ = True break if (tt1.is_comma or tt1.is_value("НО", None) or tt1.is_value("ТАК", None)): continue break if (and0_): if (items[0].can_be_noun and items[0].is_personal_pronoun): and0_ = False if (and0_): tt2 = tt1.next0_ if (tt2 is not None and tt2.morph.class0_.is_preposition): tt2 = tt2.next0_ npt1 = _NounPraseHelperInt.__try_parse_ru( tt2, typ, max_char_pos, None) if (npt1 is not None and len(npt1.adjectives) > 0): ok1 = False for av in items[0].adj_morph: for v in npt1.noun.noun_morph: if (v.check_accord(av, False, False)): items[0].morph.add_item(av) ok1 = True if (ok1): npt1.begin_token = items[0].begin_token npt1.end_token = tt1.previous npt1.adjectives.clear() npt1.adjectives.append(items[0]) return npt1 if (def_noun is not None): items.append(def_noun) last1 = items[len(items) - 1] check = True for it in items: if (not it.can_be_adj): check = False break elif (it.can_be_noun and it.is_personal_pronoun): check = False break tt1 = last1.end_token.next0_ if ((tt1 is not None and check and ((tt1.morph.class0_.is_preposition or tt1.morph.case_.is_instrumental))) and (tt1.whitespaces_before_count < 2)): inp = NounPhraseHelper.try_parse( tt1, Utils.valToEnum((typ) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), max_char_pos, None) if (inp is not None): tt1 = inp.end_token.next0_ npt1 = _NounPraseHelperInt.__try_parse_ru( tt1, typ, max_char_pos, None) if (npt1 is not None): ok = True ii = 0 first_pass3043 = True while True: if first_pass3043: first_pass3043 = False else: ii += 1 if (not (ii < len(items))): break it = items[ii] if (NounPhraseItem.try_accord_adj_and_noun( it, Utils.asObjectOrNull(npt1.noun, NounPhraseItem))): continue if (ii > 0): inp2 = NounPhraseHelper.try_parse( it.begin_token, typ, max_char_pos, None) if (inp2 is not None and inp2.end_token == inp.end_token): del items[ii:ii + len(items) - ii] inp = inp2 break ok = False break if (ok): if (npt1.morph.case_.is_genitive and not inp.morph.case_.is_instrumental): ok = False if (ok): i = 0 while i < len(items): npt1.adjectives.insert(i, items[i]) i += 1 npt1.internal_noun = inp mmm = MorphCollection(npt1.morph) for it in items: mmm.remove_items(it.adj_morph[0], False) if (mmm.gender != MorphGender.UNDEFINED or mmm.number != MorphNumber.UNDEFINED or not mmm.case_.is_undefined): npt1.morph = mmm if (adverbs is not None): if (npt1.adverbs is None): npt1.adverbs = adverbs else: npt1.adverbs[0:0] = adverbs npt1.begin_token = first return npt1 if (tt1 is not None and tt1.morph.class0_.is_noun and not tt1.morph.case_.is_genitive): it = NounPhraseItem.try_parse(tt1, items, typ) if (it is not None and it.can_be_noun): internal_noun_prase = inp inp.begin_token = items[0].end_token.next0_ items.append(it) i = 0 first_pass3044 = True while True: if first_pass3044: first_pass3044 = False else: i += 1 if (not (i < len(items))): break if (items[i].can_be_adj and items[i].begin_token.morph.class0_.is_verb): it = items[i].begin_token if (not it.get_morph_class_in_dictionary().is_verb): continue if (it.is_value("УПОЛНОМОЧЕННЫЙ", None)): continue if ((((typ) & (NounPhraseParseAttr.PARSEVERBS))) == ( NounPhraseParseAttr.NO)): continue inp = _NounPraseHelperInt.__try_parse_ru( items[i].end_token.next0_, NounPhraseParseAttr.NO, max_char_pos, None) if (inp is None): continue if (inp.anafor is not None and i == (len(items) - 1) and NounPhraseItem.try_accord_adj_and_noun( items[i], Utils.asObjectOrNull(inp.noun, NounPhraseItem))): inp.begin_token = first ii = 0 while ii < len(items): inp.adjectives.insert(ii, items[ii]) ii += 1 return inp if (inp.end_token.whitespaces_after_count > 3): continue npt1 = _NounPraseHelperInt.__try_parse_ru( inp.end_token.next0_, NounPhraseParseAttr.NO, max_char_pos, None) if (npt1 is None): continue ok = True j = 0 while j <= i: if (not NounPhraseItem.try_accord_adj_and_noun( items[j], Utils.asObjectOrNull(npt1.noun, NounPhraseItem))): ok = False break j += 1 if (not ok): continue verb = VerbPhraseHelper.try_parse(it, True, False, False) if (verb is None): continue vlinks = SemanticHelper.try_create_links(verb, inp, None) nlinks = SemanticHelper.try_create_links(inp, npt1, None) if (len(vlinks) == 0 and len(nlinks) > 0): continue j = 0 while j <= i: npt1.adjectives.insert(j, items[j]) j += 1 items[i].end_token = inp.end_token mmm = MorphCollection(npt1.morph) bil = list() j = 0 while j <= i: bil.clear() for m in items[j].adj_morph: bil.append(m) mmm.remove_items_list_cla(bil, None) j += 1 if (mmm.gender != MorphGender.UNDEFINED or mmm.number != MorphNumber.UNDEFINED or not mmm.case_.is_undefined): npt1.morph = mmm if (adverbs is not None): if (npt1.adverbs is None): npt1.adverbs = adverbs else: npt1.adverbs[0:0] = adverbs npt1.begin_token = first return npt1 ok2 = False if ((len(items) == 1 and (((typ) & (NounPhraseParseAttr.ADJECTIVECANBELAST))) != (NounPhraseParseAttr.NO) and (items[0].whitespaces_after_count < 3)) and not items[0].is_adverb): if (not items[0].can_be_adj): ok2 = True elif (items[0].is_personal_pronoun and items[0].can_be_noun): ok2 = True if (ok2): it = NounPhraseItem.try_parse(items[0].end_token.next0_, None, typ) if (it is not None and it.can_be_adj and it.begin_token.chars.is_all_lower): ok2 = True if (it.is_adverb or it.is_verb): ok2 = False if (it.is_pronoun and items[0].is_pronoun): ok2 = False if (it.can_be_adj_for_personal_pronoun and items[0].is_personal_pronoun): ok2 = True if (ok2 and NounPhraseItem.try_accord_adj_and_noun( it, items[0])): npt1 = _NounPraseHelperInt.__try_parse_ru( it.begin_token, typ, max_char_pos, None) if (npt1 is not None and ((npt1.end_char > it.end_char or len(npt1.adjectives) > 0))): pass else: items.insert(0, it) noun = None adj_after = None for i in range(len(items) - 1, -1, -1): if (items[i].can_be_noun): if (items[i].conj_before): continue if (i > 0 and not items[i - 1].can_be_adj): continue if (i > 0 and items[i - 1].can_be_noun): if (items[i - 1].is_doubt_adjective): continue if (items[i - 1].is_pronoun and items[i].is_pronoun): if (items[i].is_pronoun and items[i - 1].can_be_adj_for_personal_pronoun): pass else: continue noun = items[i] del items[i:i + len(items) - i] if (adj_after is not None): items.append(adj_after) elif (len(items) > 0 and items[0].can_be_noun and not items[0].can_be_adj): noun = items[0] items.clear() break if (noun is None): return None res = NounPhraseToken._new466(first, noun.end_token, prep) if (adverbs is not None): for a in adverbs: if (a.begin_char < noun.begin_char): if (len(items) == 0 and prep is None): return None if (res.adverbs is None): res.adverbs = list() res.adverbs.append(a) res.noun = (noun) res.multi_nouns = noun.multi_nouns if (kak): res.multi_nouns = True res.internal_noun = internal_noun_prase for v in noun.noun_morph: noun.morph.add_item(v) res.morph = noun.morph if (res.morph.case_.is_nominative and first.previous is not None and first.previous.morph.class0_.is_preposition): res.morph.case_ = (res.morph.case_) ^ MorphCase.NOMINATIVE if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO) and ((res.morph.class0_.is_pronoun or res.morph.class0_.is_personal_pronoun))): return None stat = None if (len(items) > 1): stat = dict() need_update_morph = False if (len(items) > 0): ok_list = list() is_num_not = False for vv in noun.noun_morph: i = 0 v = vv i = 0 while i < len(items): ok = False for av in items[i].adj_morph: if (v.check_accord(av, False, False)): ok = True if (not ((av.case_) & v.case_).is_undefined and av.case_ != v.case_): v.case_ = av.case_ = (av.case_) & v.case_ break if (not ok): if (items[i].can_be_numeric_adj and items[i].try_accord_var(v, False)): ok = True v1 = NounPhraseItemTextVar() v1.copy_from_item(v) v1.number = MorphNumber.PLURAL is_num_not = True v1.case_ = MorphCase() for a in items[i].adj_morph: v1.case_ = (v1.case_) | a.case_ v = v1 else: break i += 1 if (i >= len(items)): ok_list.append(v) if (len(ok_list) > 0 and (((len(ok_list) < res.morph.items_count) or is_num_not))): res.morph = MorphCollection() for v in ok_list: res.morph.add_item(v) if (not is_num_not): noun.morph = res.morph i = 0 first_pass3045 = True while True: if first_pass3045: first_pass3045 = False else: i += 1 if (not (i < len(items))): break for av in items[i].adj_morph: for v in noun.noun_morph: if (v.check_accord(av, False, False)): if (not ((av.case_) & v.case_).is_undefined and av.case_ != v.case_): v.case_ = av.case_ = (av.case_) & v.case_ need_update_morph = True items[i].morph.add_item(av) if (stat is not None and av.normal_value is not None and len(av.normal_value) > 1): last = av.normal_value[len(av.normal_value) - 1] if (not last in stat): stat[last] = 1 else: stat[last] += 1 if (items[i].is_pronoun or items[i].is_personal_pronoun): res.anafor = items[i].begin_token if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == ( NounPhraseParseAttr.NO)): continue tt = Utils.asObjectOrNull(items[i].begin_token, TextToken) if (tt is not None and not tt.term.startswith("ВЫСШ")): err = False for wf in tt.morph.items: if (wf.class0_.is_adjective): if (wf.contains_attr("прев.", None)): if ((((typ) & (NounPhraseParseAttr.IGNOREADJBEST))) != (NounPhraseParseAttr.NO)): err = True if (wf.contains_attr("к.ф.", None) and tt.morph.class0_.is_personal_pronoun): return None if (err): continue if (res.morph.case_.is_nominative): v = MiscHelper.get_text_value_of_meta_token( items[i], GetTextAttr.KEEPQUOTES) if (not Utils.isNullOrEmpty(v)): if (items[i].get_normal_case_text( None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) != v): wf = NounPhraseItemTextVar(items[i].morph, None) wf.normal_value = v wf.class0_ = MorphClass.ADJECTIVE wf.case_ = res.morph.case_ if (res.morph.case_.is_prepositional or res.morph.gender == MorphGender.NEUTER or res.morph.gender == MorphGender.FEMINIE): items[i].morph.add_item(wf) else: items[i].morph.insert_item(0, wf) res.adjectives.append(items[i]) if (items[i].end_char > res.end_char): res.end_token = items[i].end_token i = 0 first_pass3046 = True while True: if first_pass3046: first_pass3046 = False else: i += 1 if (not (i < (len(res.adjectives) - 1))): break if (res.adjectives[i].whitespaces_after_count > 5): if (res.adjectives[i].chars != res.adjectives[i + 1].chars): if (not res.adjectives[i + 1].chars.is_all_lower): return None if (res.adjectives[i].chars.is_all_upper and res.adjectives[i + 1].chars.is_capital_upper): return None if (res.adjectives[i].chars.is_capital_upper and res.adjectives[i + 1].chars.is_all_upper): return None if (res.adjectives[i].whitespaces_after_count > 10): if (res.adjectives[i].newlines_after_count == 1): if (res.adjectives[i].chars.is_capital_upper and i == 0 and res.adjectives[i + 1].chars.is_all_lower): continue if (res.adjectives[i].chars == res.adjectives[ i + 1].chars): continue return None if (need_update_morph): noun.morph = MorphCollection() for v in noun.noun_morph: noun.morph.add_item(v) res.morph = noun.morph if (len(res.adjectives) > 0): if (noun.begin_token.previous is not None): if (noun.begin_token.previous.is_comma_and): if (res.adjectives[0].begin_char > noun.begin_char): pass else: return None zap = 0 and0_ = 0 cou = 0 last_and = False i = 0 while i < (len(res.adjectives) - 1): te = res.adjectives[i].end_token.next0_ if (te is None): return None if (te.is_char('(')): pass elif (te.is_comma): zap += 1 last_and = False elif (te.is_and or te.is_or): and0_ += 1 last_and = True if (not res.adjectives[i].begin_token.morph.class0_.is_pronoun ): cou += 1 i += 1 if ((zap + and0_) > 0): if (and0_ > 1): return None elif (and0_ == 1 and not last_and): return None if ((zap + and0_) != cou): if (and0_ == 1): pass else: return None last = Utils.asObjectOrNull( res.adjectives[len(res.adjectives) - 1], NounPhraseItem) if (last.is_pronoun and not last_and): return None if (stat is not None): for adj in items: if (adj.morph.items_count > 1): w1 = Utils.asObjectOrNull(adj.morph.get_indexer_item(0), NounPhraseItemTextVar) w2 = Utils.asObjectOrNull(adj.morph.get_indexer_item(1), NounPhraseItemTextVar) if ((len(w1.normal_value) < 2) or (len(w2.normal_value) < 2)): break l1 = w1.normal_value[len(w1.normal_value) - 1] l2 = w2.normal_value[len(w2.normal_value) - 1] i1 = 0 i2 = 0 wrapi1468 = RefOutArgWrapper(0) Utils.tryGetValue(stat, l1, wrapi1468) i1 = wrapi1468.value wrapi2467 = RefOutArgWrapper(0) Utils.tryGetValue(stat, l2, wrapi2467) i2 = wrapi2467.value if (i1 < i2): adj.morph.remove_item(1) adj.morph.insert_item(0, w2) if (res.begin_token.get_morph_class_in_dictionary().is_verb and len(items) > 0): if (not res.begin_token.chars.is_all_lower or res.begin_token.previous is None): pass elif (res.begin_token.previous.morph.class0_.is_preposition): pass else: comma = False tt = res.begin_token.previous first_pass3047 = True while True: if first_pass3047: first_pass3047 = False else: tt = tt.previous if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.morph.class0_.is_adverb): continue if (tt.is_char_of(".;")): break if (tt.is_comma): comma = True continue if (tt.is_value("НЕ", None)): continue if (((tt.morph.class0_.is_noun or tt.morph.class0_.is_proper)) and comma): for it in res.begin_token.morph.items: if (it.class0_.is_verb and (isinstance(it, MorphWordForm))): if (tt.morph.check_accord(it, False, False)): if (res.morph.case_.is_instrumental): return None break if (res.begin_token == res.end_token): mc = res.begin_token.get_morph_class_in_dictionary() if (mc.is_adverb): if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): pass elif (mc.is_noun and not mc.is_preposition and not mc.is_conjunction): pass elif (res.begin_token.is_value("ВЕСЬ", None)): pass else: return None if (def_noun is not None and def_noun.end_token == res.end_token and len(res.adjectives) > 0): res.end_token = res.adjectives[len(res.adjectives) - 1].end_token return res
def tryParse(t: 'Token', add_units: 'TerminCollection', can_be_set: bool = True, can_units_absent: bool = False) -> 'MeasureToken': """ Выделение вместе с наименованием Args: t(Token): """ if (not ((isinstance(t, TextToken)))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1516 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._isMinOrMax(t0, wrapminmax1516) minmax = wrapminmax1516.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.tryParse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt is None): whd = NumbersWithUnitToken._tryParseWHL(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.isValue("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.getMorphClassInDictionary().is_undefined): npt = NounPhraseToken(t0, t0) else: return None elif (NumberHelper.tryParseRealNumber(t, True) is not None): return None else: dtok = DateItemToken.tryAttach(t, None) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new561(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3037 = True while True: if first_pass3037: first_pass3037 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1510 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._isMinOrMax(tt, wrapminmax1510) minmax = wrapminmax1510.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.isValue("БЫТЬ", None) or tt.isValue("ДОЛЖЕН", None) or tt.isValue("ДОЛЖНЫЙ", None)) or tt.isValue("МОЖЕТ", None) or ((tt.isValue("СОСТАВЛЯТЬ", None) and not tt.getMorphClassInDictionary().is_adjective))): t = tt t1 = t if (tt.previous.isValue("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._tryParseWHL(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (len(internals_) > 0 and tt.is_comma_and): continue if (tt.isValue("ПРИ", None) or len(internals_) > 0): mt1 = MeasureToken.tryParse(tt.next0_, add_units, False, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and (tt).typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue mt0 = NumbersWithUnitToken.tryParse(tt, add_units, False, False) if (mt0 is not None): break if (((tt.is_comma or tt.isChar('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._tryParseWHL(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.isChar(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.isChar('(') and t1.next0_ is not None and t1.next0_.isChar(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.isChar('(')): uu = UnitToken.tryParseList(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.isChar(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (BracketHelper.canBeStartOfSequence(tt, False, False)): br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.isValue("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.getMorphClassInDictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.isValue("ЯМЗ", None)): pass npt2 = NounPhraseHelper.tryParse( tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.tryParse( tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.getMorphClassInDictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.tryParseList(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.isChar('.')): if (not MiscHelper.canBeStartOfSentence(tt.next0_)): continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.isValue("ПРЕДЕЛ", None) or t.isValue("ГРАНИЦА", None) or t.isValue("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t1 = t1.next0_ first_pass3038 = True while True: if first_pass3038: first_pass3038 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.isCharOf(":,_")): www = NumbersWithUnitToken._tryParseWHL(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, not0_) if (mts is None): return None mt = mts[0] if (name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1511(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1506(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.isChar('±')): nn = NumbersWithUnitToken._tryParse(tt1, add_units, True, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.isCharOf(":),") or mt.begin_token.previous.is_table_control_char): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (-units[j].pow0_) j += 1 del mt.units[i:i + len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.tryParseList(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1513(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parseInternals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.canBeEquals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append( MeasureToken._new1506(mt.begin_token, mt.end_token, mt)) res.internals.append( MeasureToken._new1506(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def __try_parse_en(first: 'Token', typ: 'NounPhraseParseAttr', max_char_pos: int) -> 'NounPhraseToken': if (first is None): return None items = None has_article = False has_prop = False has_misc = False if (first.previous is not None and first.previous.morph.class0_.is_preposition and (first.whitespaces_before_count < 3)): has_prop = True t = first first_pass3048 = True while True: if first_pass3048: first_pass3048 = False else: t = t.next0_ if (not (t is not None)): break if (max_char_pos > 0 and t.begin_char > max_char_pos): break if (not t.chars.is_latin_letter): break if (t != first and t.whitespaces_before_count > 2): if ((((typ) & (NounPhraseParseAttr.MULTILINES))) != (NounPhraseParseAttr.NO)): pass elif (MiscHelper.is_eng_article(t.previous)): pass else: break tt = Utils.asObjectOrNull(t, TextToken) if (t == first and tt is not None): if (MiscHelper.is_eng_article(tt)): has_article = True continue if (isinstance(t, ReferentToken)): if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == ( NounPhraseParseAttr.NO)): break elif (tt is None): break if ((t.is_value("SO", None) and t.next0_ is not None and t.next0_.is_hiphen) and t.next0_.next0_ is not None): if (t.next0_.next0_.is_value("CALL", None)): t = t.next0_.next0_ continue mc = t.get_morph_class_in_dictionary() if (mc.is_conjunction or mc.is_preposition): break if (mc.is_pronoun or mc.is_personal_pronoun): if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == ( NounPhraseParseAttr.NO)): break elif (mc.is_misc): if (t.is_value("THIS", None) or t.is_value("THAT", None)): has_misc = True if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == ( NounPhraseParseAttr.NO)): break is_adj = False if (((has_article or has_prop or has_misc)) and items is None): pass elif (isinstance(t, ReferentToken)): pass else: if (not mc.is_noun and not mc.is_adjective): if (mc.is_undefined and has_article): pass elif (items is None and mc.is_undefined and t.chars.is_capital_upper): pass elif (mc.is_pronoun): pass elif (tt.term.endswith("EAN")): is_adj = True elif (MiscHelper.is_eng_adj_suffix(tt.next0_)): pass else: break if (mc.is_verb): if (t.next0_ is not None and t.next0_.morph.class0_.is_verb and (t.whitespaces_after_count < 2)): pass elif (t.chars.is_capital_upper and not MiscHelper.can_be_start_of_sentence(t)): pass elif ((t.chars.is_capital_upper and mc.is_noun and (isinstance(t.next0_, TextToken))) and t.next0_.chars.is_capital_upper): pass elif (isinstance(t, ReferentToken)): pass else: break if (items is None): items = list() it = NounPhraseItem(t, t) if (mc.is_noun): it.can_be_noun = True if (mc.is_adjective or mc.is_pronoun or is_adj): it.can_be_adj = True items.append(it) t = it.end_token if (len(items) == 1): if (MiscHelper.is_eng_adj_suffix(t.next0_)): mc.is_noun = False mc.is_adjective = True t = t.next0_.next0_ if (items is None): return None noun = items[len(items) - 1] res = NounPhraseToken(first, noun.end_token) res.noun = (noun) res.morph = MorphCollection() for v in noun.end_token.morph.items: if (v.class0_.is_verb): continue if (v.class0_.is_proper and noun.begin_token.chars.is_all_lower): continue if (isinstance(v, MorphWordForm)): wf = MorphWordForm() wf.copy_from_word_form(Utils.asObjectOrNull(v, MorphWordForm)) if (has_article and v.number != MorphNumber.SINGULAR): wf.number = MorphNumber.SINGULAR res.morph.add_item(wf) else: bi = MorphBaseInfo() bi.copy_from(v) if (has_article and v.number != MorphNumber.SINGULAR): bi.number = MorphNumber.SINGULAR res.morph.add_item(bi) if (res.morph.items_count == 0 and has_article): res.morph.add_item( MorphBaseInfo._new192(MorphClass.NOUN, MorphNumber.SINGULAR)) i = 0 while i < (len(items) - 1): res.adjectives.append(items[i]) i += 1 return res
def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken': if (not (isinstance(t, TextToken))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1625 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625) minmax = wrapminmax1625.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None) if (npt is None): whd = NumbersWithUnitToken._try_parsewhl(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.is_value("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): npt = NounPhraseToken(t0, t0) elif (t0.is_value("T", None) and t0.chars.is_all_lower): npt = NounPhraseToken(t0, t0) t = t0 if (t.next0_ is not None and t.next0_.is_char('=')): npt.end_token = t.next0_ elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): return None npt = NounPhraseToken(t0, t0) t = t0.next0_ while t is not None: if (t.whitespaces_before_count > 2): break elif (not (isinstance(t, TextToken))): break elif (not t.chars.is_letter): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token npt.end_token = t else: break elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): break else: npt.end_token = t t = t.next0_ else: return None elif (NumberHelper.try_parse_real_number(t, True, False) is not None): return None else: dtok = DateItemToken.try_attach(t, None, False) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3305 = True while True: if first_pass3305: first_pass3305 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1617 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617) minmax = wrapminmax1617.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): t = tt t1 = t if (tt.previous.is_value("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._try_parsewhl(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (tt.is_value("ПРИ", None)): mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False) if (n1 is not None and len(n1.units) > 0): mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1) internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): tt = tt.next0_ t = tt t1 = t continue if (len(internals_) > 0): if (tt.is_char(':')): break mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt.next0_ tt = t t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): tt = tt.next0_.next0_ t = tt t1 = t npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.end_char > tt.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((isinstance(tt, NumberToken)) and tt.previous is not None): if (tt.previous.is_value("USB", None)): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = tt.next0_ while ttt is not None: if (ttt.is_whitespace_before): break if (ttt.is_char_of(",:")): break tt = ttt t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = ttt.next0_ continue mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False) if (mt0 is not None): npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None) if (npt1 is not None and npt1.end_char > mt0.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue break if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._try_parsewhl(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.is_char(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.is_char('(')): uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (t1.next0_ is not None): if (t1.next0_.is_table_control_char or t1.is_newline_after): break units = (None) if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.is_value("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.get_morph_class_in_dictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.is_value("ЯМЗ", None)): pass npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.get_morph_class_in_dictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.try_parse_list(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.is_char('.')): if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t11 = t1 t1 = t1.next0_ first_pass3306 = True while True: if first_pass3306: first_pass3306 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.is_char_of(":,_")): if (is_resctriction): return None www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue uu = UnitToken.try_parse_list(t1.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t continue if (t1.is_char(':')): li = list() ttt = t1.next0_ first_pass3307 = True while True: if first_pass3307: first_pass3307 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_hiphen or ttt.is_table_control_char): continue if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): continue mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True) if (mt1 is None): break li.append(mt1) ttt = mt1.end_token if (ttt.next0_ is not None and ttt.next0_.is_char(';')): ttt = ttt.next0_ if (ttt.is_char(';')): pass elif (ttt.is_newline_after and mt1.is_newline_before): pass else: break if (len(li) > 1): res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True) if (internals_ is not None and len(internals_) > 0): res0.internal_ex = internals_[0] nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) li[0].begin_token = t0 for v in li: v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip() if (v.nums is not None and len(v.nums.units) == 0 and units is not None): v.nums.units = units return res0 elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction) if (mts is None): if (units is not None and len(units) > 0): if (t1 is None or t1.previous.is_char(':')): mts = list() if (t1 is None): t1 = t11 while t1 is not None and t1.next0_ is not None: pass t1 = t1.next0_ else: t1 = t1.previous mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan)) if (mts is None): return None mt = mts[0] if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): return None if (not is_subval and name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (mt.whl is not None): whd = mt.whl for kk in range(10): if (whd is not None and whd.end_token == name_.end_token): name_.end_token = whd.begin_token.previous continue if (units is not None): if (units[len(units) - 1].end_token == name_.end_token): name_.end_token = units[0].begin_token.previous continue break if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.is_char('±')): nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn if (len(nn.units) > 0 and units is None and len(mt.units) == 0): for m in mts: m.units = nn.units return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): pass elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (- units[j].pow0_) j += 1 del mt.units[i:i+len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO)) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parse_internals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt)) res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def __tryParseThesis(t: 'Token') -> 'ReferentToken': if (t is None): return None t0 = t tt = t mc = tt.getMorphClassInDictionary() preamb = None if (mc.is_conjunction): return None if (t.isValue("LET", None)): return None if (mc.is_preposition or mc.is_misc or mc.is_adverb): if (not MiscHelper.isEngArticle(tt)): tt = tt.next0_ first_pass2871 = True while True: if first_pass2871: first_pass2871 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma): break if (tt.isChar('(')): br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token continue if (MiscHelper.canBeStartOfSentence(tt)): break npt0 = NounPhraseHelper.tryParse( tt, Utils.valToEnum( (NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.REFERENTCANBENOUN), NounPhraseParseAttr), 0) if (npt0 is not None): tt = npt0.end_token continue if (tt.getMorphClassInDictionary().is_verb): break if (tt is None or not tt.is_comma or tt.next0_ is None): return None preamb = MetaToken(t0, tt.previous) tt = tt.next0_ t1 = tt mc = tt.getMorphClassInDictionary() npt = NounPhraseHelper.tryParse( tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.REFERENTCANBENOUN) | (NounPhraseParseAttr.PARSEADVERBS), NounPhraseParseAttr), 0) if (npt is None and (isinstance(tt, TextToken))): if (tt.chars.is_all_upper): npt = NounPhraseToken(tt, tt) elif (not tt.chars.is_all_lower): if (mc.is_proper or preamb is not None): npt = NounPhraseToken(tt, tt) if (npt is None): return None if (mc.is_personal_pronoun): return None t2 = npt.end_token.next0_ if (t2 is None or MiscHelper.canBeStartOfSentence(t2) or not ((isinstance(t2, TextToken)))): return None if (not t2.getMorphClassInDictionary().is_verb): return None t3 = t2 tt = t2.next0_ while tt is not None: if (not tt.getMorphClassInDictionary().is_verb): break tt = tt.next0_ first_pass2872 = True while True: if first_pass2872: first_pass2872 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.next0_ is None): t3 = tt break if (tt.isCharOf(".;!?")): if (MiscHelper.canBeStartOfSentence(tt.next0_)): t3 = tt break if (not ((isinstance(tt, TextToken)))): continue if (BracketHelper.canBeStartOfSequence(tt, False, False)): br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token continue tt = t3 if (t3.isCharOf(";.!?")): tt = tt.previous txt = MiscHelper.getTextValue( t2, tt, Utils.valToEnum( (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES), GetTextAttr)) if (txt is None or (len(txt) < 15)): return None if (t0 != t1): tt = t1.previous if (tt.is_comma): tt = tt.previous txt0 = MiscHelper.getTextValue( t0, tt, Utils.valToEnum( (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES), GetTextAttr)) if (txt0 is not None and len(txt0) > 10): if (t0.chars.is_capital_upper): txt0 = ((str.lower(txt0[0])) + txt0[1:]) txt = "{0}, {1}".format(txt, txt0) tt = t1 if (MiscHelper.isEngArticle(tt)): tt = tt.next0_ nam = MiscHelper.getTextValue(tt, t2.previous, GetTextAttr.KEEPQUOTES) if (nam.startswith("SO-CALLED")): nam = nam[9:].strip() dr = DefinitionReferent() dr.kind = DefinitionKind.ASSERTATION dr.addSlot(DefinitionReferent.ATTR_TERMIN, nam, False, 0) dr.addSlot(DefinitionReferent.ATTR_VALUE, txt, False, 0) return ReferentToken(dr, t0, t3)
def __tryParseRu(first: 'Token', typ: 'NounPhraseParseAttr', max_char_pos: int) -> 'NounPhraseToken': if (first is None): return None items = None adverbs = None internal_noun_prase = None conj_before = False t = first first_pass2788 = True while True: if first_pass2788: first_pass2788 = False else: t = t.next0_ if (not (t is not None)): break if (max_char_pos > 0 and t.begin_char > max_char_pos): break if ((t.morph.class0_.is_conjunction and not t.morph.class0_.is_adjective and not t.morph.class0_.is_pronoun) and not t.morph.class0_.is_noun): if (conj_before): break if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) != (NounPhraseParseAttr.NO)): break if (items is not None and t.is_and): conj_before = True if ((t.next0_ is not None and t.next0_.isCharOf("\\/") and t.next0_.next0_ is not None) and t.next0_.next0_.is_or): t = t.next0_.next0_ continue break elif (t.is_comma): if (conj_before or items is None): break if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) != (NounPhraseParseAttr.NO)): break mc = t.previous.getMorphClassInDictionary() if (mc.is_proper_surname or mc.is_proper_secname): break conj_before = True continue elif (t.isChar('(')): if (items is None): return None if ((((typ) & (NounPhraseParseAttr.IGNOREBRACKETS))) != (NounPhraseParseAttr.IGNOREBRACKETS)): break brr = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (brr is None): break if (brr.length_char > 100): break t = brr.end_token continue if (isinstance(t, ReferentToken)): if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == ( NounPhraseParseAttr.NO)): break elif (t.chars.is_latin_letter): break it = NounPhraseItem.tryParse(t, items, typ) if (it is None or ((not it.can_be_adj and not it.can_be_noun))): if ((((typ) & (NounPhraseParseAttr.PARSEADVERBS))) != (NounPhraseParseAttr.NO) and (isinstance(t, TextToken)) and t.morph.class0_.is_adverb): if (items is None): if (t.previous is not None and t.previous.morph.class0_.is_preposition): pass else: return None if (adverbs is None): adverbs = list() adverbs.append(Utils.asObjectOrNull(t, TextToken)) continue break it.conj_before = conj_before conj_before = False if (not it.can_be_adj and not it.can_be_noun): break if (t.is_newline_before and t != first): if ((((typ) & (NounPhraseParseAttr.MULTILINES))) != (NounPhraseParseAttr.NO)): pass elif (items is not None and t.chars != items[len(items) - 1].chars): if (t.chars.is_all_lower and items[len(items) - 1].chars.is_capital_upper): pass else: break if (items is None): items = list() else: it0 = items[len(items) - 1] if (it0.can_be_noun and it0.is_personal_pronoun): if (it.is_pronoun): break if ((it0.begin_token.previous is not None and it0.begin_token.previous.getMorphClassInDictionary( ).is_verb and not it0.begin_token.previous. getMorphClassInDictionary().is_adjective) and not it0.begin_token.previous. getMorphClassInDictionary().is_preposition): if (t.morph.case_.is_nominative or t.morph.case_.is_accusative): pass else: break if (it.can_be_noun and it.is_verb): break items.append(it) t = it.end_token if (t.is_newline_after and not t.chars.is_all_lower): mc = t.getMorphClassInDictionary() if (mc.is_proper_surname): break if (t.morph.class0_.is_proper_surname and mc.is_undefined): break if (items is None): return None if (len(items) == 1 and items[0].can_be_adj): and0_ = False tt1 = items[0].end_token.next0_ first_pass2789 = True while True: if first_pass2789: first_pass2789 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1.is_and or tt1.is_or): and0_ = True break if (tt1.is_comma or tt1.isValue("НО", None) or tt1.isValue("ТАК", None)): continue break if (and0_): if (items[0].can_be_noun and items[0].is_personal_pronoun): and0_ = False if (and0_): tt2 = tt1.next0_ if (tt2 is not None and tt2.morph.class0_.is_preposition): tt2 = tt2.next0_ npt1 = _NounPraseHelperInt.__tryParseRu(tt2, typ, max_char_pos) if (npt1 is not None and len(npt1.adjectives) > 0): ok1 = False for av in items[0].adj_morph: for v in (npt1.noun).noun_morph: if (v.checkAccord(av, False)): items[0].morph.addItem(av) ok1 = True if (ok1): npt1.begin_token = items[0].begin_token npt1.end_token = tt1.previous npt1.adjectives.clear() npt1.adjectives.append(items[0]) return npt1 last1 = items[len(items) - 1] check = True for it in items: if (not it.can_be_adj): check = False break elif (it.can_be_noun and it.is_personal_pronoun): check = False break tt1 = last1.end_token.next0_ if ((tt1 is not None and check and ((tt1.morph.class0_.is_preposition or tt1.morph.case_.is_instrumental))) and (tt1.whitespaces_before_count < 2)): inp = NounPhraseHelper.tryParse( tt1, Utils.valToEnum((typ) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), max_char_pos) if (inp is not None): tt1 = inp.end_token.next0_ npt1 = _NounPraseHelperInt.__tryParseRu(tt1, typ, max_char_pos) if (npt1 is not None): ok = True for it in items: if (not NounPhraseItem.tryAccordAdjAndNoun( it, Utils.asObjectOrNull(npt1.noun, NounPhraseItem))): ok = False break if (ok): i = 0 while i < len(items): npt1.adjectives.insert(i, items[i]) i += 1 npt1.internal_noun = inp mmm = MorphCollection(npt1.morph) for it in items: mmm.removeItems(it.adj_morph[0], False) if (mmm.gender != MorphGender.UNDEFINED or mmm.number != MorphNumber.UNDEFINED or not mmm.case_.is_undefined): npt1.morph = mmm if (adverbs is not None): if (npt1.adverbs is None): npt1.adverbs = adverbs else: npt1.adverbs[0:0] = adverbs return npt1 if (tt1 is not None and tt1.morph.class0_.is_noun): it = NounPhraseItem.tryParse(tt1, items, typ) if (it is not None and it.can_be_noun): internal_noun_prase = inp inp.begin_token = items[0].end_token.next0_ items.append(it) ok2 = False if ((len(items) == 1 and (((typ) & (NounPhraseParseAttr.ADJECTIVECANBELAST))) != (NounPhraseParseAttr.NO) and (items[0].whitespaces_after_count < 3)) and not items[0].is_adverb): if (not items[0].can_be_adj): ok2 = True elif (items[0].is_personal_pronoun and items[0].can_be_noun): ok2 = True if (ok2): it = NounPhraseItem.tryParse(items[0].end_token.next0_, None, typ) if (it is not None and it.can_be_adj and it.begin_token.chars.is_all_lower): ok2 = True if (it.is_adverb or it.is_verb): ok2 = False if (it.is_pronoun and items[0].is_pronoun): ok2 = False if (it.can_be_adj_for_personal_pronoun and items[0].is_personal_pronoun): ok2 = True if (ok2 and NounPhraseItem.tryAccordAdjAndNoun(it, items[0])): npt1 = _NounPraseHelperInt.__tryParseRu( it.begin_token, typ, max_char_pos) if (npt1 is not None and ((npt1.end_char > it.end_char or len(npt1.adjectives) > 0))): pass else: items.insert(0, it) noun = None adj_after = None for i in range(len(items) - 1, -1, -1): if (items[i].can_be_noun): if (items[i].conj_before): continue if (i > 0 and not items[i - 1].can_be_adj): continue if (i > 0 and items[i - 1].can_be_noun): if (items[i - 1].is_doubt_adjective): continue if (items[i - 1].is_pronoun and items[i].is_pronoun): if (items[i].is_pronoun and items[i - 1].can_be_adj_for_personal_pronoun): pass else: continue noun = items[i] del items[i:i + len(items) - i] if (adj_after is not None): items.append(adj_after) break if (noun is None): return None res = NounPhraseToken(first, noun.end_token) if (adverbs is not None): for a in adverbs: if (a.begin_char < noun.begin_char): if (res.adverbs is None): res.adverbs = list() res.adverbs.append(a) res.noun = (noun) res.internal_noun = internal_noun_prase for v in noun.noun_morph: noun.morph.addItem(v) res.morph = noun.morph if (res.morph.case_.is_nominative and first.previous is not None and first.previous.morph.class0_.is_preposition): res.morph.case_ = (res.morph.case_) ^ MorphCase.NOMINATIVE if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO) and ((res.morph.class0_.is_pronoun or res.morph.class0_.is_personal_pronoun))): return None stat = None if (len(items) > 1): stat = dict() need_update_morph = False if (len(items) > 0): ok_list = list() is_num_not = False for vv in noun.noun_morph: v = vv i = 0 while i < len(items): ok = False for av in items[i].adj_morph: if (v.checkAccord(av, False)): ok = True if (not ((av.case_) & v.case_).is_undefined and av.case_ != v.case_): v.case_ = av.case_ = (av.case_) & v.case_ break if (not ok): if (items[i].can_be_numeric_adj and items[i].tryAccordVar(v)): ok = True v = (Utils.asObjectOrNull(v.clone(), NounPhraseItemTextVar)) v.number = MorphNumber.PLURAL is_num_not = True v.case_ = MorphCase() for a in items[i].adj_morph: v.case_ = (v.case_) | a.case_ else: break i += 1 if (i >= len(items)): ok_list.append(v) if (len(ok_list) > 0 and (((len(ok_list) < res.morph.items_count) or is_num_not))): res.morph = MorphCollection() for v in ok_list: res.morph.addItem(v) if (not is_num_not): noun.morph = res.morph i = 0 first_pass2790 = True while True: if first_pass2790: first_pass2790 = False else: i += 1 if (not (i < len(items))): break for av in items[i].adj_morph: for v in noun.noun_morph: if (v.checkAccord(av, False)): if (not ((av.case_) & v.case_).is_undefined and av.case_ != v.case_): v.case_ = av.case_ = (av.case_) & v.case_ need_update_morph = True items[i].morph.addItem(av) if (stat is not None and len(av.normal_value) > 1): last = av.normal_value[len(av.normal_value) - 1] if (not last in stat): stat[last] = 1 else: stat[last] += 1 if (items[i].is_pronoun or items[i].is_personal_pronoun): res.anafor = items[i].begin_token if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == ( NounPhraseParseAttr.NO)): continue tt = Utils.asObjectOrNull(items[i].begin_token, TextToken) if (tt is not None and not tt.term.startswith("ВЫСШ")): err = False for wf in tt.morph.items: if (wf.class0_.is_adjective): if (wf.containsAttr("прев.", None)): if ((((typ) & (NounPhraseParseAttr.IGNOREADJBEST))) != (NounPhraseParseAttr.NO)): err = True if (wf.containsAttr("к.ф.", None) and tt.morph.class0_.is_personal_pronoun): return None if (err): continue if (res.morph.case_.is_nominative): v = MiscHelper.getTextValueOfMetaToken(items[i], GetTextAttr.KEEPQUOTES) if (not Utils.isNullOrEmpty(v)): if (items[i].getNormalCaseText( None, False, MorphGender.UNDEFINED, False) != v): wf = NounPhraseItemTextVar(items[i].morph, None) wf.normal_value = v wf.class0_ = MorphClass.ADJECTIVE wf.case_ = res.morph.case_ if (res.morph.case_.is_prepositional or res.morph.gender == MorphGender.NEUTER or res.morph.gender == MorphGender.FEMINIE): items[i].morph.addItem(wf) else: items[i].morph.insertItem(0, wf) res.adjectives.append(items[i]) if (items[i].end_char > res.end_char): res.end_token = items[i].end_token i = 0 first_pass2791 = True while True: if first_pass2791: first_pass2791 = False else: i += 1 if (not (i < (len(res.adjectives) - 1))): break if (res.adjectives[i].whitespaces_after_count > 5): if (res.adjectives[i].chars != res.adjectives[i + 1].chars): if (not res.adjectives[i + 1].chars.is_all_lower): return None if (res.adjectives[i].chars.is_all_upper and res.adjectives[i + 1].chars.is_capital_upper): return None if (res.adjectives[i].chars.is_capital_upper and res.adjectives[i + 1].chars.is_all_upper): return None if (res.adjectives[i].whitespaces_after_count > 10): if (res.adjectives[i].newlines_after_count == 1): if (res.adjectives[i].chars.is_capital_upper and i == 0 and res.adjectives[i + 1].chars.is_all_lower): continue if (res.adjectives[i].chars == res.adjectives[ i + 1].chars): continue return None if (need_update_morph): noun.morph = MorphCollection() for v in noun.noun_morph: noun.morph.addItem(v) res.morph = noun.morph if (len(res.adjectives) > 0): if (noun.begin_token.previous is not None): if (noun.begin_token.previous.is_comma_and): if (res.adjectives[0].begin_char > noun.begin_char): pass else: return None zap = 0 and0_ = 0 cou = 0 last_and = False i = 0 while i < (len(res.adjectives) - 1): te = res.adjectives[i].end_token.next0_ if (te is None): return None if (te.isChar('(')): pass elif (te.is_comma): zap += 1 elif (te.is_and): and0_ += 1 if (i == (len(res.adjectives) - 2)): last_and = True if (not res.adjectives[i].begin_token.morph.class0_.is_pronoun ): cou += 1 i += 1 if ((zap + and0_) > 0): if (and0_ > 1): return None elif (and0_ == 1 and not last_and): return None if ((zap + and0_) != cou): if (and0_ == 1): pass else: return None last = Utils.asObjectOrNull( res.adjectives[len(res.adjectives) - 1], NounPhraseItem) if (last.is_pronoun and not last_and): return None if (stat is not None): for adj in items: if (adj.morph.items_count > 1): w1 = Utils.asObjectOrNull(adj.morph.getIndexerItem(0), NounPhraseItemTextVar) w2 = Utils.asObjectOrNull(adj.morph.getIndexerItem(1), NounPhraseItemTextVar) if ((len(w1.normal_value) < 2) or (len(w2.normal_value) < 2)): break l1 = w1.normal_value[len(w1.normal_value) - 1] l2 = w2.normal_value[len(w2.normal_value) - 1] i1 = 0 i2 = 0 wrapi1534 = RefOutArgWrapper(0) Utils.tryGetValue(stat, l1, wrapi1534) i1 = wrapi1534.value wrapi2533 = RefOutArgWrapper(0) Utils.tryGetValue(stat, l2, wrapi2533) i2 = wrapi2533.value if (i1 < i2): adj.morph.removeItem(1) adj.morph.insertItem(0, w2) if (res.begin_token.getMorphClassInDictionary().is_verb and len(items) > 0): if (not res.begin_token.chars.is_all_lower or res.begin_token.previous is None): pass elif (res.begin_token.previous.morph.class0_.is_preposition): pass else: comma = False tt = res.begin_token.previous first_pass2792 = True while True: if first_pass2792: first_pass2792 = False else: tt = tt.previous if (not (tt is not None)): break if (tt.morph.class0_.is_adverb): continue if (tt.isCharOf(".;")): break if (tt.is_comma): comma = True continue if (tt.isValue("НЕ", None)): continue if (((tt.morph.class0_.is_noun or tt.morph.class0_.is_proper)) and comma): for it in res.begin_token.morph.items: if (it.class0_.is_verb and (isinstance(it, MorphWordForm))): if (tt.morph.checkAccord(it, False)): if (res.morph.case_.is_instrumental): return None ews = Explanatory.findDerivates( (it).normal_case, True, tt.morph.language) if (ews is not None): for ew in ews: if (ew.transitive > 0): if (res.morph.case_.is_genitive ): return None if (ew.nexts is not None): wrapcm535 = RefOutArgWrapper( None) inoutres536 = Utils.tryGetValue( ew.nexts, "", wrapcm535) cm = wrapcm535.value if (inoutres536): if (not ( (cm) & res.morph.case_ ).is_undefined): return None break if (res.begin_token == res.end_token): mc = res.begin_token.getMorphClassInDictionary() if (mc.is_adverb): if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): pass elif (mc.is_noun and not mc.is_preposition and not mc.is_conjunction): pass elif (res.begin_token.isValue("ВЕСЬ", None)): pass else: return None return res
def parse_near_items(t : 'Token', t1 : 'Token', lev : int, prev : typing.List['SentItem']) -> typing.List['SentItem']: if (lev > 100): return None if (t is None or t.begin_char > t1.end_char): return None res = list() if (isinstance(t, ReferentToken)): res.append(SentItem(Utils.asObjectOrNull(t, MetaToken))) return res delim = DelimToken.try_parse(t) if (delim is not None): res.append(SentItem(delim)) return res conj = ConjunctionHelper.try_parse(t) if (conj is not None): res.append(SentItem(conj)) return res prep_ = PrepositionHelper.try_parse(t) t111 = (t if prep_ is None else prep_.end_token.next0_) if ((isinstance(t111, NumberToken)) and ((t111.morph.class0_.is_adjective and not t111.morph.class0_.is_noun))): t111 = (None) num = (None if t111 is None else NumbersWithUnitToken.try_parse(t111, None, False, False, False, False)) if (num is not None): if (len(num.units) == 0): npt1 = NounPhraseHelper.try_parse(num.end_token.next0_, SentItem.__m_npt_attrs, 0, None) if (npt1 is None and num.end_token.next0_ is not None and num.end_token.next0_.is_value("РАЗ", None)): npt1 = NounPhraseToken(num.end_token.next0_, num.end_token.next0_) npt1.noun = MetaToken(num.end_token.next0_, num.end_token.next0_) if (npt1 is not None and prep_ is not None): if (npt1.noun.end_token.is_value("РАЗ", None)): npt1.morph.remove_items(prep_.next_case, False) elif (((npt1.morph.case_) & prep_.next_case).is_undefined): npt1 = (None) else: npt1.morph.remove_items(prep_.next_case, False) if ((npt1 is not None and npt1.end_token.is_value("ОНИ", None) and npt1.preposition is not None) and npt1.preposition.normal == "ИЗ"): npt1.morph = MorphCollection(num.end_token.morph) npt1.preposition = (None) nn = str(num) si1 = SentItem(npt1) if (nn == "1" and (isinstance(num.end_token, NumberToken)) and num.end_token.end_token.is_value("ОДИН", None)): a = SemAttribute._new2946(SemAttributeType.ONEOF, num.end_token.end_token.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)) aex = SemAttributeEx._new2945(num, a) si1.attrs = list() si1.attrs.append(aex) else: si1.quant = SemQuantity(nn, num.begin_token, num.end_token) if (prep_ is not None): si1.prep = prep_.normal res.append(si1) return res if (npt1 is not None): si1 = SentItem._new2948(npt1, SemQuantity(str(num), num.begin_token, num.end_token)) if (prep_ is not None): si1.prep = prep_.normal if (npt1.end_token.is_value("РАЗ", None)): si1.typ = SentItemType.FORMULA if (((npt1.morph.number) & (MorphNumber.PLURAL)) == (MorphNumber.UNDEFINED) and si1.quant.spelling != "1"): ok = False if (si1.quant.spelling.endswith("1")): ok = True elif (si1.typ == SentItemType.FORMULA): ok = True elif (si1.quant.spelling.endswith("2") and npt1.morph.case_.is_genitive): ok = True elif (si1.quant.spelling.endswith("3") and npt1.morph.case_.is_genitive): ok = True elif (si1.quant.spelling.endswith("4") and npt1.morph.case_.is_genitive): ok = True if (ok): npt1.morph = MorphCollection() npt1.morph.number = MorphNumber.PLURAL res.append(si1) return res num.begin_token = t num.morph = MorphCollection(num.end_token.morph) si = SentItem(num) if (prep_ is not None): si.prep = prep_.normal res.append(si) if (si.prep == "НА"): aa = AdverbToken.try_parse(si.end_token.next0_) if (aa is not None and ((aa.typ == SemAttributeType.LESS or aa.typ == SemAttributeType.GREAT))): si.add_attr(aa) si.end_token = aa.end_token return res mc = t.get_morph_class_in_dictionary() adv = AdverbToken.try_parse(t) npt = NounPhraseHelper.try_parse(t, SentItem.__m_npt_attrs, 0, None) if (npt is not None and (isinstance(npt.end_token, TextToken)) and npt.end_token.term == "БЫЛИ"): npt = (None) if (npt is not None and adv is not None): if (adv.end_char > npt.end_char): npt = (None) elif (adv.end_char == npt.end_char): res.append(SentItem(npt)) res.append(SentItem(adv)) return res if (npt is not None and len(npt.adjectives) == 0): if (npt.end_token.is_value("КОТОРЫЙ", None) and t.previous is not None and t.previous.is_comma_and): res1 = SentItem.__parse_subsent(npt, t1, lev + 1, prev) if (res1 is not None): return res1 if (npt.end_token.is_value("СКОЛЬКО", None)): tt1 = npt.end_token.next0_ if (tt1 is not None and tt1.is_value("ВСЕГО", None)): tt1 = tt1.next0_ npt1 = NounPhraseHelper.try_parse(tt1, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and not npt1.morph.case_.is_undefined and prep_ is not None): if (((prep_.next_case) & npt1.morph.case_).is_undefined): npt1 = (None) else: npt1.morph.remove_items(prep_.next_case, False) if (npt1 is not None): npt1.begin_token = npt.begin_token npt1.preposition = npt.preposition npt1.adjectives.append(MetaToken(npt.end_token, npt.end_token)) npt = npt1 if (npt.end_token.morph.class0_.is_adjective): if (VerbPhraseHelper.try_parse(t, True, False, False) is not None): npt = (None) vrb = None if (npt is not None and len(npt.adjectives) > 0): vrb = VerbPhraseHelper.try_parse(t, True, False, False) if (vrb is not None and vrb.first_verb.is_participle): npt = (None) elif (adv is None or npt is not None): vrb = VerbPhraseHelper.try_parse(t, True, False, False) if (npt is not None): res.append(SentItem(npt)) if (vrb is not None and not vrb.first_verb.is_participle and not vrb.first_verb.is_dee_participle): vars0_ = list() for wf in vrb.first_verb.morph.items: if (wf.class0_.is_verb and (isinstance(wf, MorphWordForm)) and wf.is_in_dictionary): vars0_.append(Utils.asObjectOrNull(wf, MorphWordForm)) if (len(vars0_) < 2): res.append(SentItem(vrb)) else: vrb.first_verb.verb_morph = vars0_[0] res.append(SentItem(vrb)) i = 1 while i < len(vars0_): vrb = VerbPhraseHelper.try_parse(t, False, False, False) if (vrb is None): break vrb.first_verb.verb_morph = vars0_[i] res.append(SentItem(vrb)) i += 1 if (vars0_[0].misc.mood == MorphMood.IMPERATIVE and vars0_[1].misc.mood != MorphMood.IMPERATIVE): rr = res[0] res[0] = res[1] res[1] = rr return res if (vrb is not None): res1 = SentItem.__parse_participles(vrb, t1, lev + 1) if (res1 is not None): res.extend(res1) if (len(res) > 0): return res if (adv is not None): if (adv.typ == SemAttributeType.OTHER): npt1 = NounPhraseHelper.try_parse(adv.end_token.next0_, SentItem.__m_npt_attrs, 0, None) if (npt1 is not None and npt1.end_token.is_value("ОНИ", None) and npt1.preposition is not None): si1 = SentItem(npt1) a = SemAttribute._new2946(SemAttributeType.OTHER, adv.end_token.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)) aex = SemAttributeEx._new2945(num, a) si1.attrs = list() si1.attrs.append(aex) if (prep_ is not None): si1.prep = prep_.normal res.append(si1) return res for i in range(len(prev) - 1, -1, -1): if (prev[i].attrs is not None): for a in prev[i].attrs: if (a.attr.typ == SemAttributeType.ONEOF): si1 = SentItem(prev[i].source) aa = SemAttribute._new2946(SemAttributeType.OTHER, adv.end_token.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)) aex = SemAttributeEx._new2945(adv, aa) si1.attrs = list() si1.attrs.append(aex) if (prep_ is not None): si1.prep = prep_.normal si1.begin_token = adv.begin_token si1.end_token = adv.end_token res.append(si1) return res res.append(SentItem(adv)) return res if (mc.is_adjective): npt = NounPhraseToken._new2953(t, t, MorphCollection(t.morph)) npt.noun = MetaToken(t, t) res.append(SentItem(npt)) return res return None