def parse_an_unit(self, source: str) -> DateTimeResolutionResult: result = DateTimeResolutionResult() match = regex.search(self.config.an_unit_regex, source) if match is None: match = regex.search(self.config.half_date_unit_regex, source) if match is None: return result num = (0.5 if match.group('half') else 1) + self.parse_number_with_unit_and_suffix(source) source_unit = match.group('unit') or '' if source_unit not in self.config.unit_map: return result num = FormatUtility.float_or_int(num) unit = self.config.unit_map[source_unit] is_time = 'T' if self.is_less_than_day(unit) else '' result.timex = f'P{is_time}{num}{unit[0]}' result.future_value = FormatUtility.float_or_int( num * self.config.unit_value_map[source_unit]) result.past_value = result.future_value result.success = True return result
def parse_in_exact_number_unit(self, source: str) -> DateTimeResolutionResult: result = DateTimeResolutionResult() match = regex.search(self.config.inexact_number_unit_regex, source) if match is None: return result # set the inexact number "few", "some" to 3 for now num = float(3) source_unit = match.group('unit') or '' if source_unit not in self.config.unit_map: return result unit = self.config.unit_map[source_unit] if num > 1000 and unit in ['Y', 'MON', 'W']: return result num = FormatUtility.float_or_int(num) is_time = 'T' if self.is_less_than_day(unit) else '' result.timex = f'P{is_time}{num}{unit[0]}' result.future_value = FormatUtility.float_or_int( num * self.config.unit_value_map[source_unit]) result.past_value = result.future_value result.success = True return result
def parse_number_combined_unit(self, source: str) -> DateTimeResolutionResult: result = DateTimeResolutionResult() match = regex.search(self.config.number_combined_with_unit, source) if match is None: return result num = float(match.group( 'num')) + self.parse_number_with_unit_and_suffix(source) source_unit = match.group('unit') or '' if source_unit not in self.config.unit_map: return result unit = self.config.unit_map[source_unit] if num > 1000 and unit in ['Y', 'MON', 'W']: return result num = FormatUtility.float_or_int(num) is_time = 'T' if self.is_less_than_day(unit) else '' result.timex = f'P{is_time}{num}{unit[0]}' result.future_value = FormatUtility.float_or_int( num * self.config.unit_value_map[source_unit]) result.past_value = result.future_value result.success = True return result
def parse_number_space_unit(self, source: str) -> DateTimeResolutionResult: result = DateTimeResolutionResult() ers = self.config.cardinal_extractor.extract(source) if len(ers) != 1: return result suffix = source source_unit = '' er = ers[0] pr = self.config.number_parser.parse(er) no_num = source[pr.start + pr.length:].strip().lower() match = regex.search(self.config.followed_unit, no_num) if match is not None: suffix = RegExpUtility.get_group(match, 'suffix') source_unit = RegExpUtility.get_group(match, 'unit') if source_unit not in self.config.unit_map: return result num = float(pr.value) + self.parse_number_with_unit_and_suffix(suffix) unit = self.config.unit_map[source_unit] num = FormatUtility.float_or_int(num) is_time = 'T' if self.is_less_than_day(unit) else '' result.timex = f'P{is_time}{num}{unit[0]}' result.future_value = FormatUtility.float_or_int( num * self.config.unit_value_map[source_unit]) result.past_value = result.future_value result.success = True return result
def parse(self, query: str) -> List[ModelResult]: query = FormatUtility.preprocess(query, False) #query = FormatUtility.preProcess(query, false) TODO: for chinese characters extraction_results = [] for item in self.extractor_parser: extract_results = item.extractor.extract(query) parse_results = [ r for r in [item.parser.parse(r) for r in extract_results] if not r.value is None ] for parse_result in parse_results: model_result = ModelResult() model_result.start = parse_result.start model_result.end = parse_result.start + parse_result.end - 1 model_result.text = parse_result.text model_result.type_name = self.model_type_name model_result.resolution = self.get_resolution( parse_result.value) b_add = not [ x for x in extraction_results if x.start == model_result.start and x.end == model_result.end ] if b_add: extraction_results.append(model_result) return extraction_results
def get_result_from_regex(self, pattern: Pattern, source: str, num: float) -> DateTimeResolutionResult: result = DateTimeResolutionResult() match: Match = regex.search(pattern, source) if match is None: return result source_unit: str = match.group('unit') or '' if source_unit not in self.config.unit_map: return result num = FormatUtility.float_or_int(num) unit = self.config.unit_map[source_unit] is_time = 'T' if self.is_less_than_day(unit) else '' result.timex = f'P{is_time}{num}{unit[0]}' result.future_value = FormatUtility.float_or_int( num * self.config.unit_value_map[source_unit]) result.past_value = result.future_value result.success = True return result
def parse( self, query: str, reference: datetime = None) -> List[ModelResult]: #pylint: disable=W0221 query = FormatUtility.preprocess(query) extract_results = self.extractor.extract(query, reference) parser_dates = [] for result in extract_results: parse_result = self.parser.parse(result, reference) if isinstance(parse_result.value, list): parser_dates += parse_result.value else: parser_dates.append(parse_result) return [self.__to_model_result(x) for x in parser_dates]
def merge_two_time_points(self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() ers = self.config.time_extractor.extract(source, reference) valid_time_number = True if len(ers) != 2: if len(ers) == 1: time_er = ers[0] num_ers = self.config.integer_extractor.extract(source) for num in num_ers: middle_begin = 0 middle_end = 0 # ending number if num.start > time_er.start + time_er.length: middle_begin = time_er.start + time_er.length middle_end = num.start - middle_begin elif num.start + num.length < time_er.start: middle_begin = num.start + num.length middle_end = time_er.start - middle_begin # check if the middle string between the time point and the valid number is a connect string. middle_str = source[middle_begin:middle_begin + middle_end] if regex.search(self.config.till_regex, middle_str) is not None: num.type = Constants.SYS_DATETIME_TIME ers.append(num) valid_time_number = True break ers = sorted(ers, key=lambda x: x.start) if not valid_time_number: return result if len(ers) != 2: return result pr1 = self.config.time_parser.parse(ers[0], reference) pr2 = self.config.time_parser.parse(ers[1], reference) if pr1.value is None or pr2.value is None: return result ampm_str1: str = pr1.value.comment ampm_str2: str = pr2.value.comment begin_time: datetime = pr1.value.future_value end_time: datetime = pr2.value.future_value if ampm_str2 and ampm_str2.endswith( 'ampm') and end_time <= begin_time and end_time + timedelta( hours=12) > begin_time: end_time: datetime = end_time + timedelta(hours=12) pr2.value.future_value = end_time pr2.timex_str = f'T{end_time.hour}' if end_time.minute > 0: pr2.timex_str = f'{pr2.timex_str}:{end_time.minute}' if ampm_str1 and ampm_str1.endswith( 'ampm') and end_time > begin_time + timedelta(hours=12): begin_time: datetime = begin_time + timedelta(hours=12) pr1.value.future_value = begin_time pr1.timex_str = f'T{begin_time.hour}' if begin_time.minute > 0: pr1.timex_str = f'{pr1.timex_str}:{begin_time.minute}' if end_time < begin_time: end_time = end_time + timedelta(days=1) hours = FormatUtility.float_or_int( (end_time - begin_time).total_seconds() // 3600) minutes = FormatUtility.float_or_int( (end_time - begin_time).total_seconds() / 60 % 60) hours_str = f'{hours}H' if hours > 0 else '' minutes_str = f'{minutes}M' if minutes > 0 and minutes < 60 else '' result.timex = f'({pr1.timex_str},{pr2.timex_str},PT{hours_str}{minutes_str})' result.future_value = ResolutionStartEnd(begin_time, end_time) result.past_value = ResolutionStartEnd(begin_time, end_time) result.success = True if ampm_str1 and ampm_str1.endswith( 'ampm') and ampm_str2 and ampm_str2.endswith('ampm'): result.comment = 'ampm' result.sub_date_time_entities = [pr1, pr2] return result