Exemplo n.º 1
0
class QueryParser(object):
    def __init__(self):
        self.nlp = BosonNLP(bosonnlp_token)

    def parse(self, query_string):
        """
        input:
        7月22号 北京到上海的高铁票
        output:
        [{'entity': [[0, 3, 'time'], [3, 4, 'location'], [5, 6, 'location']], # 需要理解实体出现的模式,这块需要理解上下文
        'tag': ['t', 'm', 'q', 'ns', 'p', 'ns', 'ude', 'n', 'n'],
         'word': ['7月', '22', '号', '北京', '到', '上海', '的', '高铁', '票']}]
        """
        result = self.nlp.ner(query_string)[0]
        words = result['word']
        tags = result['tag']
        entities = result['entity']
        return (words, entities, tags)

    def get_entity(self, parsed_words, index_tuple):
        """
        获取已识别的实体
        采用filter
        参考 python cookbook部分

        input:
            entities : 二元组
            parsed_words : 解析好的词组
        """
        return parsed_words[index_tuple[0]:index_tuple[1]]

    def format_entities(self, entities):
        """
        给元组命名
        """
        namedentity = collections.namedtuple('namedentity', 'index_begin index_end entity_name')
        return [namedentity(entity[0], entity[1], entity[2]) for entity in entities]

    def get_format_time(self, time_entity):
        """
        output
        {'timestamp': '2013-02-28 16:30:29', 'type': 'timestamp'}
        """
        basetime = datetime.datetime.today()
        result = self.nlp.convert_time(
            time_entity,
            basetime)
        # print(result)
        timestamp = result["timestamp"]
        return timestamp.split(" ")[0]
Exemplo n.º 2
0
class QueryParser(object):
    def __init__(self):
        self.nlp = BosonNLP(bosonnlp_token)
    def parse(self, query_string):
        """
        input:
        7月22号 北京到上海的高铁票
        output:
        [{'entity': [[0, 3, 'time'], [3, 4, 'location'], [5, 6, 'location']], # 需要理解实体出现的模式,这块需要理解上下文
        'tag': ['t', 'm', 'q', 'ns', 'p', 'ns', 'ude', 'n', 'n'],
         'word': ['7月', '22', '号', '北京', '到', '上海', '的', '高铁', '票']}]
        """
        result = self.nlp.ner(query_string)[0]
        words = result['word']
        tags = result['tag']
        entities = result['entity']
        return (words,entities,tags)
    def get_entity(self,parsed_words,index_tuple):
        """
        获取已识别的实体
        采用filter
        参考 python cookbook部分

        input:
            entities : 二元组
            parsed_words : 解析好的词组
        """
        return parsed_words[index_tuple[0]:index_tuple[1]]

    def format_entities(self,entities):
        """
        给元组命名
        """
        namedentity = collections.namedtuple('namedentity','index_begin index_end entity_name')
        return [namedentity(entity[0],entity[1],entity[2]) for entity in entities]

    def get_format_time(self,time_entity):
        """
        output
        {'timestamp': '2013-02-28 16:30:29', 'type': 'timestamp'}
        """
        basetime = datetime.datetime.today()
        result = self.nlp.convert_time(
            time_entity,
            basetime)
        #print(result)
        timestamp = result["timestamp"]
        return timestamp.split(" ")[0]