Exemplo n.º 1
0
    def __timeEx(self):
        """

        :param target: 输入文本字符串
        :param timeBase: 输入基准时间
        :return: TimeUnit[]时间表达式类型数组
        """
        startline = -1
        endline = -1
        rpointer = 0
        temp = []
        match = self.pattern.finditer(self.target)
        for m in match:
            startline = m.start()
            if startline == endline:
                rpointer -= 1
                temp[rpointer] = temp[rpointer] + m.group()
            else:
                temp.append(m.group())
            endline = m.end()
            rpointer += 1
        res = []
        # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。
        contextTp = TimePoint()
        for i in range(0, rpointer):
            res.append(TimeUnit(temp[i], self, contextTp))
            contextTp = res[i].tp
        res = self.__filterTimeUnit(res)
        return res
Exemplo n.º 2
0
    def __timeEx(self):
        """

        :param target: 输入文本字符串
        :param timeBase: 输入基准时间
        :return: TimeUnit[]时间表达式类型数组
        """
        res = []

        sents = cut_sent(
            re.sub(
                'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                '[url]', self.target)).split('\n')
        for sent in sents:

            startline = -1
            endline = -1
            rpointer = 0
            temp = []
            match = self.pattern.finditer(sent)

            for m in match:
                # print(m.group())
                startline = m.start()
                if startline == endline:
                    rpointer -= 1
                    temp[rpointer] = temp[rpointer] + m.group()
                else:
                    temp.append(m.group())

                endline = m.end()
                rpointer += 1

            if rpointer > 0:
                if sent.find('将') != -1:
                    isPreferFuture = True
                    # print('future!!')
                else:
                    isPreferFuture = False
                self.isPreferFuture = isPreferFuture

            # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。
            contextTp = TimePoint()
            for i in range(0, rpointer):
                res.append(
                    TimeUnit(temp[i], self, contextTp, sent, isPreferFuture))
                contextTp = res[i].tp
        res = self.__filterTimeUnit(res)
        return res
Exemplo n.º 3
0
    def __timeEx(self):
        """

        :param target: 输入文本字符串
        :param timeBase: 输入基准时间
        :return: TimeUnit[]时间表达式类型数组
        """
        startline = -1
        endline = -1
        rpointer = 0
        temp = []

        match = self.pattern.finditer(self.target)
        for m in match:
            startline = m.start()
            if startline == endline:
                rpointer -= 1
                temp[rpointer] = temp[rpointer] + m.group()
            else:
                temp.append(m.group())
            endline = m.end()
            rpointer += 1
        res = []

        # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。
        contextTp = TimePoint()
        # print(self.timeBase)
        # print('temp',temp)
        for i in range(0, rpointer):
            # 这里是一个类嵌套了一个类
            # y = temp[i]
            # ss = TimeUnit(temp[i], self, contextTp)
            res.append(TimeUnit(temp[i], self, contextTp))

            # res[i].tp.tunit[3] = -1
            contextTp = res[i].tp
            self.timeBase = arrow.get(arrow.now()).format('YYYY-M-D-H-m-s')
            # print(self.nowTime.year)
            # print(contextTp.tunit)
        res = self.__filterTimeUnit(res)

        return [temp, res]
Exemplo n.º 4
0
    def __timeEx(self):
        """

        :param target: 输入文本字符串
        :param timeBase: 输入基准时间
        :return: TimeUnit[]时间表达式类型数组
        """
        startline = -1
        endline = -1
        rpointer = 0
        time_extractor = []

        match = self.pattern.finditer(self.target)
        for m in match:
            startline = m.start()
            if startline == endline:
                rpointer -= 1
                time_extractor[rpointer] = time_extractor[rpointer] + m.group()
            else:
                time_extractor.append(m.group())
            endline = m.end()
            rpointer += 1
        res = []
        # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。
        contextTp = TimePoint()
        # print(self.timeBase)
        print('time_extractor:', time_extractor)
        for i in range(0, rpointer):
            # 这里是一个类嵌套了一个类
            res.append(TimeUnit(time_extractor[i], self, contextTp))
            # res[i].tp.tunit[3] = -1
            contextTp = res[i].tp
            # print(self.nowTime.year)
            # print(contextTp.tunit)
        res = self.__filterTimeUnit(res)

        return res