Пример #1
0
    def parse_an_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        match = regex.search(self.config.an_unit_regex, source)

        if match is None:
            match = regex.search(self.config.half_date_unit_regex, source)

        if match is None:
            return result

        num = (0.5 if match.group(Constants.HALF) else
               1) + self.parse_number_with_unit_and_suffix(source)
        source_unit = match.group(Constants.UNIT) or ''

        if source_unit not in self.config.unit_map:
            return result

        num = QueryProcessor.float_or_int(num)
        unit = self.config.unit_map[source_unit]
        is_time = Constants.UNIT_T if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(
            num * self.config.unit_value_map[source_unit])
        result.past_value = result.future_value
        result.success = True
        return result
Пример #2
0
    def parse_in_exact_number_unit(self,
                                   source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        match = regex.search(self.config.inexact_number_unit_regex, source)

        if match is None:
            return result

        # set the inexact number "few", "some" to 3 for now
        num = float(3)
        source_unit = match.group(Constants.UNIT) or ''
        if source_unit not in self.config.unit_map:
            return result

        unit = self.config.unit_map[source_unit]
        if num > 1000 and unit in [
                Constants.UNIT_Y, Constants.UNIT_MON, Constants.UNIT_W
        ]:
            return result

        num = QueryProcessor.float_or_int(num)
        is_time = Constants.UNIT_T if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(
            num * self.config.unit_value_map[source_unit])
        result.past_value = result.future_value
        result.success = True
        return result
Пример #3
0
    def parse_number_space_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        # if there are spaces between number and unit
        ers = self.config.cardinal_extractor.extract(source)
        if len(ers) != 1:
            return result

        suffix = source
        source_unit = ''
        er = ers[0]
        pr = self.config.number_parser.parse(er)
        no_num = source[pr.start + pr.length:].strip().lower()
        match = regex.search(self.config.followed_unit, no_num)

        if match is not None:
            suffix = RegExpUtility.get_group(match,
                                             Constants.SUFFIX_GROUP_NAME)
            source_unit = RegExpUtility.get_group(match, Constants.UNIT)

        if source_unit not in self.config.unit_map:
            return result

        num = float(pr.value) + self.parse_number_with_unit_and_suffix(suffix)
        unit = self.config.unit_map[source_unit]

        num = QueryProcessor.float_or_int(num)
        is_time = 'T' if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(
            num * self.config.unit_value_map[source_unit])
        result.past_value = result.future_value
        result.success = True
        return result
Пример #4
0
    def parse_number_combined_unit(self,
                                   source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        # if there are NO spaces between number and unit
        match = regex.search(self.config.number_combined_with_unit, source)
        if match is None:
            return result

        num = float(match.group(
            Constants.NUM)) + self.parse_number_with_unit_and_suffix(source)

        source_unit = match.group(Constants.UNIT) or ''
        if source_unit not in self.config.unit_map:
            return result

        unit = self.config.unit_map[source_unit]
        if num > 1000 and unit in [
                Constants.UNIT_Y, Constants.UNIT_MON, Constants.UNIT_W
        ]:
            return result

        num = QueryProcessor.float_or_int(num)
        is_time = Constants.UNIT_T if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(
            num * self.config.unit_value_map[source_unit])
        result.past_value = result.future_value
        result.success = True
        return result
Пример #5
0
    def parse_number_combined_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        match = regex.search(self.config.number_combined_with_unit, source)
        if match is None:
            return result

        num = float(match.group('num')) + \
            self.parse_number_with_unit_and_suffix(source)

        source_unit = match.group('unit') or ''
        if source_unit not in self.config.unit_map:
            return result

        unit = self.config.unit_map[source_unit]
        if num > 1000 and unit in ['Y', 'MON', 'W']:
            return result

        num = QueryProcessor.float_or_int(num)
        is_time = 'T' if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(
            num * self.config.unit_value_map[source_unit])
        result.past_value = result.future_value
        result.success = True
        return result
Пример #6
0
    def parse(self, query: str) -> List[ModelResult]:
        query = QueryProcessor.preprocess(query, True)

        extraction_results = []
        for item in self.extractor_parser:
            extract_results = item.extractor.extract(query)
            parse_results = [
                r for r in [item.parser.parse(r) for r in extract_results]
                if not r.value is None
            ]

            for parse_result in parse_results:
                model_result = ModelResult()
                model_result.start = parse_result.start
                model_result.end = parse_result.start + parse_result.length - 1
                model_result.text = parse_result.text
                model_result.type_name = self.model_type_name
                model_result.resolution = self.get_resolution(
                    parse_result.value)

                b_add = not [
                    x for x in extraction_results if
                    x.start == model_result.start and x.end == model_result.end
                ]

                if b_add:
                    extraction_results.append(model_result)

        return extraction_results
Пример #7
0
    def parse(self, query: str) -> List[ModelResult]:
        query = QueryProcessor.preprocess(query, True)
        extraction_results = []
        parse_results = []

        try:
            for item in self.extractor_parser:
                extract_results = item.extractor.extract(query)
                for result in extract_results:
                    r = item.parser.parse(result)
                    if r.value is not None:
                        if isinstance(r.value, list):
                            for j in r.value:
                                parse_results.append(j)
                        else:
                            parse_results.append(r)

                for parse_result in parse_results:
                    model_result = ModelResult()
                    model_result.start = parse_result.start
                    model_result.end = parse_result.start + parse_result.length - 1
                    model_result.text = parse_result.text
                    model_result.type_name = self.model_type_name
                    model_result.resolution = self.get_resolution(
                        parse_result.value)

                    b_add = not [x for x in extraction_results if x.start ==
                                 model_result.start and x.end == model_result.end]

                    if b_add:
                        extraction_results.append(model_result)
        except Exception:
            pass

        return extraction_results
Пример #8
0
    def get_result_from_regex(self, pattern: Pattern, source: str, num: float) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        match: Match = regex.search(pattern, source)
        if match is None:
            return result

        source_unit: str = match.group('unit') or ''
        if source_unit not in self.config.unit_map:
            return result

        num = QueryProcessor.float_or_int(num)
        unit = self.config.unit_map[source_unit]
        is_time = 'T' if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(num * self.config.unit_value_map[source_unit])
        result.past_value = result.future_value
        result.success = True
        return result
Пример #9
0
    def parse(self, query: str) -> List[ModelResult]:
        query = QueryProcessor.preprocess(query, True)
        results = []

        try:
            extract_results = self.extractor.extract(query)
            results = list(map(self.__single_parse, extract_results))
        except Exception:
            pass

        return results
Пример #10
0
    def __init__(self):
        super().__init__()

        self._direct_utc_regex = RegExpUtility.get_safe_reg_exp(TimeZoneDefinitions.DirectUtcRegex)
        self._abbreviations_list = list(TimeZoneDefinitions.AbbreviationsList)
        self._full_name_list = list(TimeZoneDefinitions.FullNameList)
        self._timezone_matcher = TimeZoneUtility.build_matcher_from_lists(self.full_name_list, self.abbreviations_list)
        self._location_time_suffix_regex = RegExpUtility.get_safe_reg_exp(TimeZoneDefinitions.LocationTimeSuffixRegex)
        self._location_matcher = StringMatcher()
        self._ambiguous_timezone_list = list(TimeZoneDefinitions.AmbiguousTimezoneList)

        self._location_matcher.init(list(map(lambda o: QueryProcessor.remove_diacritics(o.lower()), TimeZoneDefinitions.MajorLocations)))
Пример #11
0
    def parse(self, query: str, reference: datetime = None) -> List[ModelResult]:  # pylint: disable=W0221
        query = QueryProcessor.preprocess(query)

        extract_results = self.extractor.extract(query, reference)
        parser_dates = []

        for result in extract_results:
            parse_result = self.parser.parse(result, reference)
            if isinstance(parse_result.value, list):
                parser_dates += parse_result.value
            else:
                parser_dates.append(parse_result)

        return [self.__to_model_result(x) for x in parser_dates]
Пример #12
0
    def merge_two_time_points(self, source: str,
                              reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        ers = self.config.time_extractor.extract(source, reference)
        valid_time_number = True

        if len(ers) != 2:
            if len(ers) == 1:
                time_er = ers[0]
                num_ers = self.config.integer_extractor.extract(source)

                for num in num_ers:
                    middle_begin = 0
                    middle_end = 0

                    # ending number
                    if num.start > time_er.start + time_er.length:
                        middle_begin = time_er.start + time_er.length
                        middle_end = num.start - middle_begin
                    elif num.start + num.length < time_er.start:
                        middle_begin = num.start + num.length
                        middle_end = time_er.start - middle_begin

                    # check if the middle string between the time point and the valid number is a connect string.
                    middle_str = source[middle_begin:middle_begin + middle_end]
                    if regex.search(self.config.till_regex,
                                    middle_str) is not None:
                        num.type = Constants.SYS_DATETIME_TIME
                        ers.append(num)
                        valid_time_number = True
                        break

                ers = sorted(ers, key=lambda x: x.start)

            if not valid_time_number:
                return result

        if len(ers) != 2:
            return result

        pr1 = self.config.time_parser.parse(ers[0], reference)
        pr2 = self.config.time_parser.parse(ers[1], reference)

        if pr1.value is None or pr2.value is None:
            return result

        ampm_str1: str = pr1.value.comment
        ampm_str2: str = pr2.value.comment
        begin_time: datetime = pr1.value.future_value
        end_time: datetime = pr2.value.future_value

        if ampm_str2 and ampm_str2.endswith(
                'ampm') and end_time <= begin_time and end_time + timedelta(
                    hours=12) > begin_time:
            end_time: datetime = end_time + timedelta(hours=12)
            pr2.value.future_value = end_time
            pr2.timex_str = f'T{end_time.hour}'
            if end_time.minute > 0:
                pr2.timex_str = f'{pr2.timex_str}:{end_time.minute}'

        if ampm_str1 and ampm_str1.endswith(
                'ampm') and end_time > begin_time + timedelta(hours=12):
            begin_time: datetime = begin_time + timedelta(hours=12)
            pr1.value.future_value = begin_time
            pr1.timex_str = f'T{begin_time.hour}'
            if begin_time.minute > 0:
                pr1.timex_str = f'{pr1.timex_str}:{begin_time.minute}'

        if end_time < begin_time:
            end_time = end_time + timedelta(days=1)

        hours = QueryProcessor.float_or_int(
            (end_time - begin_time).total_seconds() // 3600)
        minutes = QueryProcessor.float_or_int(
            (end_time - begin_time).total_seconds() / 60 % 60)

        hours_str = f'{hours}H' if hours > 0 else ''
        minutes_str = f'{minutes}M' if minutes > 0 and minutes < 60 else ''
        result.timex = f'({pr1.timex_str},{pr2.timex_str},PT{hours_str}{minutes_str})'
        result.future_value = ResolutionStartEnd(begin_time, end_time)
        result.past_value = ResolutionStartEnd(begin_time, end_time)
        result.success = True
        if ampm_str1 and ampm_str1.endswith(
                'ampm') and ampm_str2 and ampm_str2.endswith('ampm'):
            result.comment = 'ampm'

        result.sub_date_time_entities = [pr1, pr2]
        return result