def __parseInternals(self, add_units: 'TerminCollection') -> None: if (self.end_token.next0_ is not None and ((self.end_token.next0_.isCharOf("\\/") or self.end_token.next0_.isValue("ПРИ", None)))): mt1 = MeasureToken.tryParse(self.end_token.next0_.next0_, add_units, True, False) if (mt1 is not None): self.internals.append(mt1) self.end_token = mt1.end_token else: mt = NumbersWithUnitToken.tryParse( self.end_token.next0_.next0_, add_units, False, False) if (mt is not None and len(mt.units) > 0 and not UnitToken.canBeEquals(self.nums.units, mt.units)): self.internals.append( MeasureToken._new1506(mt.begin_token, mt.end_token, mt)) self.end_token = mt.end_token
def tryParse(t: 'Token', add_units: 'TerminCollection', can_be_set: bool = True, can_units_absent: bool = False) -> 'MeasureToken': """ Выделение вместе с наименованием Args: t(Token): """ if (not ((isinstance(t, TextToken)))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1516 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._isMinOrMax(t0, wrapminmax1516) minmax = wrapminmax1516.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.tryParse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt is None): whd = NumbersWithUnitToken._tryParseWHL(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.isValue("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.getMorphClassInDictionary().is_undefined): npt = NounPhraseToken(t0, t0) else: return None elif (NumberHelper.tryParseRealNumber(t, True) is not None): return None else: dtok = DateItemToken.tryAttach(t, None) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new561(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3037 = True while True: if first_pass3037: first_pass3037 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1510 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._isMinOrMax(tt, wrapminmax1510) minmax = wrapminmax1510.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.isValue("БЫТЬ", None) or tt.isValue("ДОЛЖЕН", None) or tt.isValue("ДОЛЖНЫЙ", None)) or tt.isValue("МОЖЕТ", None) or ((tt.isValue("СОСТАВЛЯТЬ", None) and not tt.getMorphClassInDictionary().is_adjective))): t = tt t1 = t if (tt.previous.isValue("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._tryParseWHL(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (len(internals_) > 0 and tt.is_comma_and): continue if (tt.isValue("ПРИ", None) or len(internals_) > 0): mt1 = MeasureToken.tryParse(tt.next0_, add_units, False, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and (tt).typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue mt0 = NumbersWithUnitToken.tryParse(tt, add_units, False, False) if (mt0 is not None): break if (((tt.is_comma or tt.isChar('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._tryParseWHL(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.isChar(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.isChar('(') and t1.next0_ is not None and t1.next0_.isChar(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.isChar('(')): uu = UnitToken.tryParseList(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.isChar(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (BracketHelper.canBeStartOfSequence(tt, False, False)): br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.isValue("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.getMorphClassInDictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.isValue("ЯМЗ", None)): pass npt2 = NounPhraseHelper.tryParse( tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.tryParse( tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.getMorphClassInDictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.tryParseList(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.isChar('.')): if (not MiscHelper.canBeStartOfSentence(tt.next0_)): continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.isValue("ПРЕДЕЛ", None) or t.isValue("ГРАНИЦА", None) or t.isValue("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t1 = t1.next0_ first_pass3038 = True while True: if first_pass3038: first_pass3038 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.isCharOf(":,_")): www = NumbersWithUnitToken._tryParseWHL(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, not0_) if (mts is None): return None mt = mts[0] if (name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1511(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1506(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.isChar('±')): nn = NumbersWithUnitToken._tryParse(tt1, add_units, True, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.isCharOf(":),") or mt.begin_token.previous.is_table_control_char): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (-units[j].pow0_) j += 1 del mt.units[i:i + len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.tryParseList(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1513(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parseInternals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.canBeEquals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append( MeasureToken._new1506(mt.begin_token, mt.end_token, mt)) res.internals.append( MeasureToken._new1506(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def _tryParse(t: 'Token', add_units: 'TerminCollection', second: bool, can_omit_number: bool) -> 'NumbersWithUnitToken': if (t is None): return None while t is not None: if (t.is_comma_and or t.isValue("НО", None)): t = t.next0_ else: break t0 = t about_ = False min_max = 0 wrapmin_max1523 = RefOutArgWrapper(min_max) ttt = NumbersWithUnitToken._isMinOrMax(t, wrapmin_max1523) min_max = wrapmin_max1523.value if (ttt is not None): t = ttt.next0_ if (t is None): return None if (t is None): return None if (t.isChar('~') or t.isValue("ОКОЛО", None) or t.isValue("ПРИМЕРНО", None)): t = t.next0_ about_ = True if (t is None): return None if (t0.isChar('(')): mt0 = NumbersWithUnitToken._tryParse(t.next0_, add_units, False, False) if (mt0 is not None and mt0.end_token.next0_ is not None and mt0.end_token.next0_.isChar(')')): if (second): if (mt0.from_val is not None and mt0.to_val is not None and mt0.from_val == (-mt0.to_val)): pass else: return None mt0.begin_token = t0 mt0.end_token = mt0.end_token.next0_ uu = UnitToken.tryParseList(mt0.end_token.next0_, add_units, False) if (uu is not None and len(mt0.units) == 0): mt0.units = uu mt0.end_token = uu[len(uu) - 1].end_token return mt0 plusminus = False unit_before = False dty = NumbersWithUnitToken.DiapTyp.UNDEFINED uni = None tok = NumbersWithUnitToken.M_TERMINS.tryParse(t, TerminParseAttr.NO) if (tok is not None): t = tok.end_token.next0_ dty = (Utils.valToEnum(tok.termin.tag, NumbersWithUnitToken.DiapTyp)) if (not tok.is_whitespace_after): if (t is None): return None if (t.isCharOf(":")): pass elif (isinstance(t, NumberToken)): pass elif (t.is_comma and t.next0_ is not None and t.next0_.isValue("ЧЕМ", None)): t = t.next0_.next0_ if (t is not None and t.morph.class0_.is_preposition): t = t.next0_ else: return None if (t is not None and t.isChar('(')): uni = UnitToken.tryParseList(t.next0_, add_units, False) if (uni is not None): t = uni[len(uni) - 1].end_token.next0_ while t is not None: if (t.isCharOf("):")): t = t.next0_ else: break mt0 = NumbersWithUnitToken._tryParse( t, add_units, False, can_omit_number) if (mt0 is not None and len(mt0.units) == 0): mt0.begin_token = t0 mt0.units = uni return mt0 elif (t.isChar('<')): dty = NumbersWithUnitToken.DiapTyp.LS t = t.next0_ if (t is not None and t.isChar('=')): t = t.next0_ dty = NumbersWithUnitToken.DiapTyp.LE elif (t.isChar('>')): dty = NumbersWithUnitToken.DiapTyp.GT t = t.next0_ if (t is not None and t.isChar('=')): t = t.next0_ dty = NumbersWithUnitToken.DiapTyp.GE elif (t.isChar('≤')): dty = NumbersWithUnitToken.DiapTyp.LE t = t.next0_ elif (t.isChar('≥')): dty = NumbersWithUnitToken.DiapTyp.GE t = t.next0_ if (t is not None and t.isChar(':')): t = t.next0_ if (t is not None): if (t.isChar('+') or t.isValue("ПЛЮС", None)): t = t.next0_ if (t is not None and not t.is_whitespace_before): if (t.is_hiphen): t = t.next0_ plusminus = True elif ((t.isCharOf("\\/") and t.next0_ is not None and not t.is_newline_after) and t.next0_.is_hiphen): t = t.next0_.next0_ plusminus = True elif (second and ((t.isCharOf("\\/÷…~")))): t = t.next0_ elif ((t.is_hiphen and t == t0 and not second) and NumbersWithUnitToken.M_TERMINS.tryParse( t.next0_, TerminParseAttr.NO) is not None): tok = NumbersWithUnitToken.M_TERMINS.tryParse( t.next0_, TerminParseAttr.NO) t = tok.end_token.next0_ dty = (Utils.valToEnum(tok.termin.tag, NumbersWithUnitToken.DiapTyp)) elif (t.is_hiphen and t == t0 and ((t.is_whitespace_after or second))): t = t.next0_ elif (t.isChar('±')): t = t.next0_ plusminus = True elif ((second and t.isChar('.') and t.next0_ is not None) and t.next0_.isChar('.')): t = t.next0_.next0_ if (t is not None and t.isChar('.')): t = t.next0_ if (t is None): return None num = NumberHelper.tryParseRealNumber(t, True) if (num is None): uni = UnitToken.tryParseList(t, add_units, False) if (uni is not None): unit_before = True t = uni[len(uni) - 1].end_token.next0_ delim = False while t is not None: if (t.isCharOf(":,")): delim = True t = t.next0_ else: break if (not delim): if (t is None or not t.is_whitespace_before): return None if (t.next0_ is not None and t.is_hiphen and t.is_whitespace_after): delim = True t = t.next0_ num = NumberHelper.tryParseRealNumber(t, True) res = None rval = 0 if (num is None): tt = NumbersWithUnitToken.M_SPEC.tryParse(t, TerminParseAttr.NO) if (tt is not None): rval = (tt.termin.tag) unam = tt.termin.tag2 for u in UnitsHelper.UNITS: if (u.fullname_cyr == unam): uni = list() uni.append(UnitToken._new1517(t, t, u)) break if (uni is None): return None res = NumbersWithUnitToken._new1519(t0, tt.end_token, about_) t = tt.end_token.next0_ else: if (not can_omit_number): return None if ((uni is not None and len(uni) == 1 and uni[0].begin_token == uni[0].end_token) and uni[0].length_char > 3): rval = (1) res = NumbersWithUnitToken._new1519( t0, uni[len(uni) - 1].end_token, about_) t = res.end_token.next0_ else: return None else: if ((t == t0 and t0.is_hiphen and not t.is_whitespace_before) and not t.is_whitespace_after and (num.real_value < 0)): return None t = num.end_token.next0_ res = NumbersWithUnitToken._new1519(t0, num.end_token, about_) rval = num.real_value if (uni is None): uni = UnitToken.tryParseList(t, add_units, False) if (uni is not None): if ((plusminus and second and len(uni) == 1) and uni[0].unit == UnitsHelper.UPERCENT): res.end_token = uni[len(uni) - 1].end_token res.plus_minus_percent = True tt1 = uni[0].end_token.next0_ uni = UnitToken.tryParseList(tt1, add_units, False) if (uni is not None): res.units = uni res.end_token = uni[len(uni) - 1].end_token else: res.units = uni res.end_token = uni[len(uni) - 1].end_token t = res.end_token.next0_ else: res.units = uni if (len(uni) > 1): uni1 = UnitToken.tryParseList(t, add_units, False) if (((uni1 is not None and uni1[0].unit == uni[0].unit and (len(uni1) < len(uni))) and uni[len(uni1)].pow0_ == -1 and uni1[len(uni1) - 1].end_token.next0_ is not None) and uni1[len(uni1) - 1].end_token.next0_.isCharOf("/\\")): num2 = NumbersWithUnitToken._tryParse( uni1[len(uni1) - 1].end_token.next0_.next0_, add_units, False, False) if (num2 is not None and num2.units is not None and num2.units[0].unit == uni[len(uni1)].unit): res.units = uni1 res.div_num = num2 res.end_token = num2.end_token if (dty != NumbersWithUnitToken.DiapTyp.UNDEFINED): if (dty == NumbersWithUnitToken.DiapTyp.GE or dty == NumbersWithUnitToken.DiapTyp.FROM): res.from_include = True res.from_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.GT): res.from_include = False res.from_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.LE or dty == NumbersWithUnitToken.DiapTyp.TO): res.to_include = True res.to_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.LS): res.to_include = False res.to_val = rval is_second_max = False if (not second): iii = 0 wrapiii1522 = RefOutArgWrapper(iii) ttt = NumbersWithUnitToken._isMinOrMax(t, wrapiii1522) iii = wrapiii1522.value if (ttt is not None and iii > 0): is_second_max = True t = ttt.next0_ next0__ = (None if second or plusminus or ((t is not None and t.is_newline_before)) else NumbersWithUnitToken._tryParse(t, add_units, True, False)) if (next0__ is not None and ((next0__.to_val is not None or next0__.single_val is not None)) and next0__.from_val is None): if (len(next0__.units) > 0): if (len(res.units) == 0): res.units = next0__.units elif (not UnitToken.canBeEquals(res.units, next0__.units)): next0__ = (None) elif (len(res.units) > 0 and not unit_before and not next0__.plus_minus_percent): next0__ = (None) if (next0__ is not None): res.end_token = next0__.end_token if (next0__ is not None and next0__.to_val is not None): res.to_val = next0__.to_val res.to_include = next0__.to_include elif (next0__ is not None and next0__.single_val is not None): if (next0__.begin_token.isCharOf("/\\")): res.div_num = next0__ res.single_val = rval return res elif (next0__.plus_minus_percent): res.single_val = rval res.plus_minus = next0__.single_val res.plus_minus_percent = True res.to_include = True else: res.to_val = next0__.single_val res.to_include = True if (next0__ is not None): if (res.from_val is None): res.from_val = rval res.from_include = True return res elif ((next0__ is not None and next0__.from_val is not None and next0__.to_val is not None) and next0__.to_val == (-next0__.from_val)): if (len(next0__.units) == 1 and next0__.units[0].unit == UnitsHelper.UPERCENT and len(res.units) > 0): res.single_val = rval res.plus_minus = next0__.to_val res.plus_minus_percent = True res.end_token = next0__.end_token return res if (len(next0__.units) == 0): res.single_val = rval res.plus_minus = next0__.to_val res.end_token = next0__.end_token return res res.from_val = (next0__.from_val + rval) res.from_include = True res.to_val = (next0__.to_val + rval) res.to_include = True res.end_token = next0__.end_token if (len(next0__.units) > 0): res.units = next0__.units return res if (dty == NumbersWithUnitToken.DiapTyp.UNDEFINED): if (plusminus and ((not res.plus_minus_percent or not second))): res.from_include = True res.from_val = (-rval) res.to_include = True res.to_val = rval else: res.single_val = rval res.plus_minus_percent = plusminus return res