Пример #1
0
def checkSingel():
    old_singel = read_file(project_path + "/dm/single.txt")
    new_singel = read_file(project_path + "/dm/single2.txt")

    for i in range(len(old_singel)):
        if old_singel[i] != new_singel[i]:
            print(old_singel[i] + "===" + new_singel[i])
Пример #2
0
    def __init__(self):
        """
        匹配类的初始化
        1. 从文件读取出图谱中的实体和属性
        2. 属性需要进一步出去,分离出图谱属性,统配属性,属性别称
        3. 实体个属性都按照长度排序
        4. 设定停用词和过滤符号

        """
        self.ltp_util = LTPUtil()
        self.compare_util = compareNLU()
        self.calculate_util = calculateNLU()
        self.graph_util = graphSearch()

        self.syntaxMatch = {
            'task_whether': [['是不是'], ['是', '吗'], ['有没有'], ['有', '吗'], ['是否'],
                             ['是否是'], ['会', '吗']]
        }

        self.instanceArray = list(
            set(read_file(project_path + "/data/allentity.csv")))
        self.instanceArray = sorted(self.instanceArray,
                                    key=lambda i: len(i),
                                    reverse=True)

        self.typeArray = list(set(read_file(project_path + "/data/etype.csv")))
        self.typeArray = sorted(self.typeArray,
                                key=lambda i: len(i),
                                reverse=True)

        proArray = read_file(project_path + "/data/cleanpro.csv")
        self.standardPro = sorted(proArray, key=lambda i: len(i), reverse=True)

        relArray = read_file(project_path + "/data/cleanrel.csv")
        self.relArray = sorted(relArray, key=lambda i: len(i), reverse=True)

        self.template_ent = {'国家': '日本', '湖泊': '洞庭湖', '河流': '长江'}

        self.commonPro = []
        self.aliasArray = {}

        aliasArray = read_file(project_path + "/data/pro.csv")

        for p in aliasArray:
            temp_standard = p.split(":")
            #self.aliasArray[temp_standard[0]] = []
            if temp_standard[1] != '':

                temp_alias = temp_standard[1].split(",")

                if len(temp_alias) > 0 and temp_alias[0] != '':
                    self.aliasArray[temp_standard[0]] = temp_alias

        jieba.load_userdict(self.instanceArray)
        jieba.load_userdict(self.standardPro)
        jieba.load_userdict(self.relArray)
        jieba.load_userdict(project_path + "/data/jieba_other.csv")
Пример #3
0
def getProEnt():
    entpro = read_file(project_path + "/entpro.txt")
    enttype = read_file(project_path + "/data/etype.csv")

    inf = open("in.txt", "w")
    outf = open("out.txt", "w")
    for sub in enttype:
        if sub in entpro:
            inf.writelines(sub + "\n")
        else:
            outf.writelines(sub + "\n")
Пример #4
0
    def __init__(self):
        """
        匹配类的初始化
        1. 从文件读取出图谱中的实体和属性
        2. 属性需要进一步出去,分离出图谱属性,统配属性,属性别称
        3. 实体个属性都按照长度排序
        4. 设定停用词和过滤符号
        """
        self.pattern_match = PatternMatch()
        self.ltp_util = LTPUtil()

        self.instanceArray = list(
            set(read_file(project_path + "/data/allentity.csv")))
        self.instanceArray = sorted(self.instanceArray,
                                    key=lambda i: len(i),
                                    reverse=True)

        proArray = read_file(project_path + "/data/pro.csv")
        proArray = sorted(proArray,
                          key=lambda i: len(i.split(":")[0]),
                          reverse=True)

        self.standardPro = []
        self.commonPro = []
        self.aliasPro = {}

        self.stopword = [
            "为什么", "什么", "如何", "谁", "多少", "几", "怎么着", "怎么样", "怎么", "怎样", "怎的",
            "怎", "哪里", "哪儿", "哪", "吗", "呢", "吧", "啊", "么"
        ]
        self.symbol = [
            ",", ",", ".", "。", "!", "!", "@", "#", "$", "%", "^", "&", "*",
            "(", "(", ")", ")", "{", "「", "}", "」", "[", "]", "【", "】", "、",
            "\\", "|", ";", ";", "<", ">", "?", "?", "`", "~", "·", "~", ":",
            ":", "*"
        ]

        jieba.load_userdict(project_path + "/data/allentity.csv")
        jieba.load_userdict(project_path + "/data/pro.csv")
        jieba.load_userdict(project_path + "/data/jieba_other.csv")

        for p in proArray:
            temp_standard = p.split(":")
            self.standardPro.append(temp_standard[0])
            if temp_standard[1] != '':
                temp_alias = temp_standard[1].split(",")
                if temp_alias[0] != '':
                    if temp_alias[0] in self.commonPro:
                        continue
                    self.commonPro.append(temp_alias[0])
                if len(temp_alias) > 1 and temp_alias[1] != '':
                    self.aliasPro[temp_standard[0]] = temp_alias[1:]
Пример #5
0
    def resetTripleByDeleteOld(self, filename, type):
        """
        将之前的三元组针对主语和谓词删除(也就是该实体的该属性全部删除),然后添加新的三元组
        :param type:
        :param old:
        :param new:
        :return:
        """

        reset_list = read_file(project_path + "/data/operate/add.txt")

        sub_list = []
        pre_list = []
        obj_list = []
        for r in reset_list:

            triple = r.split(" ")
            sub = triple[0]
            pre = triple[1]
            obj = triple[2]

            self.search_util.deleteTripleBySAP(sub, pre)
            sub_list.append(sub)
            pre_list.append(pre)

            obj_list.append(obj)

        self.addTripleToRepertory(sub_list, pre_list, obj_list, type)

        log = open(project_path + "/data/log/" + type + ".txt", "a")

        log.writelines("add by delete olds: \n")
Пример #6
0
    def __init__(self):
        """
        匹配类的初始化
        1. 从文件读取出图谱中的实体和属性
        2. 属性需要进一步出去,分离出图谱属性,统配属性,属性别称
        3. 设定停用词和过滤符号

        """
        self.classArray = read_file(project_path + "/data/cleanclass.txt")
        self.instanceArray = read_file(project_path + "/data/entity.csv")
        proArray = read_file(project_path + "/data/pro.csv")
        self.standardPro = []
        self.commonPro = []
        self.aliasPro = {}

        self.stopword = ["为什么","什么","如何","谁","多少","几","怎么着","怎么样","怎么","怎样","怎的","怎",
                        "哪里","哪儿","哪","吗","呢","吧","啊","么"]
        self.symbol = [",",",",".","。","!","!","@","#","$",
                         "%","^","&","*","(","(",")",")","{","「","}","」","[","]","【","】","、","\\","|",";",
                       ";","<",">","?","?","`","~","·","~",":",":","*"]

        for p in proArray:
            temp_standard = p.split(":")
            self.standardPro.append(temp_standard[0])
            if temp_standard[1] != '':
                temp_alias = temp_standard[1].split(",")
                if temp_alias[0] != '':
                    if temp_alias[0] in self.commonPro:
                        continue
                    self.commonPro.append(temp_alias[0])
                if len(temp_alias)>1 and temp_alias[1] != '':
                        self.aliasPro[temp_standard[0]]= temp_alias[1:]
        for c in self.commonPro:
            print(c)
        print("=================================")
        for name,con in self.aliasPro.items():
            print(name)
            for c in con:
                print(c)
Пример #7
0
    def resetNewProToRepertory(self, old, new, new_py, type):
        """
        读取文件中的三元组,将其中的谓词替换为新建谓词
        :param old:
        :param new:
        :param new_py:
        :param type:
        :return:
        """

        modify_list = []
        new_list = []
        old_list = read_file(project_path + "/data/pro/" + type + "/" + old +
                             ".txt")

        modify_pre = old_list[0]
        predicate = self.search_util.getPredicate(modify_pre)

        self.search_util.addProperty(new, new_py)
        new_predicate = self.search_util.getPredicate(new)

        for o in old_list[1:]:
            information = o.split(":")
            sub_label = information[0]
            print("sub_label", sub_label)
            subject = self.search_util.getSubject(sub_label)
            modify_list.append({
                "subject": subject,
                "predicate": predicate,
                "object": information[1]
            })
            new_list.append({
                "subject": subject,
                "predicate": new_predicate,
                "object": information[1]
            })
        data = {
            'repertoryName': 'geo4',
            'oldList': str(modify_list),
            'newList': str(new_list)
        }
        self.search_util.resetTripleToRepertory(data)
        operate = open(project_path + "/data/log/operate.txt", "a")
        operate.writelines("add " + new + "\n")
        operate.writelines("=========================================\n")
        log = open(project_path + "/data/log/" + type + ".txt", "a")

        log.writelines("reset:  [" + type + "]" + modify_pre + "-->" + new +
                       "\n")
        log.writelines("=========================================\n")
Пример #8
0
    def addRelTripleToRepertory(self, filename, type):
        """
        添加一条三元组(属性)
        :param subj:
        :param pred:
        :param obje:
        :param type:
        :return:
        """
        log = open(project_path + "/data/log/" + type + ".txt", "a")
        tripleList = read_file(project_path + "/data2/inf/" + filename +
                               ".csv")
        add_tripleList = []
        sub = []
        pre = []
        obj = []
        for triple in tripleList:

            inf_arr = triple.split(" ")

            rel_list = self.search_util.getRelList(inf_arr[0])
            if inf_arr[1] in rel_list:
                print(inf_arr[0], inf_arr[1])
                continue

            obje = self.search_util.getSubject(inf_arr[2])
            if obje is None:
                #print(inf_arr[2])
                continue

            sub.append(inf_arr[0])
            pre.append(inf_arr[1])
            obj.append(inf_arr[2])

            subject = self.search_util.getSubject(inf_arr[0])
            predicate = self.search_util.getRelPredicate(inf_arr[1])

            print(subject, predicate, obje)
            add_tripleList.append({
                "subject": subject,
                "predicate": predicate,
                "object": obje
            })

        self.search_util.addRelTripleToRepertory(add_tripleList)

        for i in range(len(sub)):
            log.writelines("add:  " + sub[i] + "-" + pre[i] + "-" + obj[i] +
                           "\n")
            log.writelines("=========================================\n")
Пример #9
0
    def checkCalculateDist(self, question):
        """
        计算山之间的距离模块
        :param question:
        :return:
        """
        print(question)
        mountain_list = read_file(project_path + "/data/compare/mountain.csv")
        hill_list = read_file(project_path + "/data/compare/hill.csv")
        mountain_list = sorted(mountain_list,
                               key=lambda i: len(i),
                               reverse=True)
        hill_list = sorted(hill_list, key=lambda i: len(i), reverse=True)

        ask_hill = []
        ask_mountain = []

        for mountain in mountain_list:
            if mountain in question:

                question = question.replace(mountain, "MOUNTAIN")
                ask_mountain.append(mountain)

        for hill in hill_list:
            if hill in question:

                question = question.replace(hill, "HILL")
                ask_hill.append(hill)

        ans = self.calculate_dist_aiml.respond(question)
        if ans == "" or ans is None:
            return None, None, None
        else:
            entity = ask_hill + ask_mountain

            return "task_calculate_dist", {'entity': entity}, ans
Пример #10
0
    def resetRelToRepertory(self, type, old, new):
        """
        读取文件修改三元组中的谓词
        :param type:
        :param old:
        :param new:
        :return:
        """

        modify_list = []
        new_list = []
        old_list = read_file(project_path + "/data/pro/" + type + "/" + old +
                             ".txt")

        modify_pre = old_list[0]
        predicate = self.search_util.getPredicate(modify_pre)
        new_predicate = self.search_util.getRelPredicate(new)
        for o in old_list[1:]:
            information = o.split(":")
            sub_label = information[0]
            obj_label = information[1]

            subject = self.search_util.getSubject(sub_label)
            obj = self.search_util.getSubject(obj_label)
            modify_list.append({
                "subject": subject,
                "predicate": predicate,
                "object": obj_label
            })
            new_list.append({
                "subject": subject,
                "predicate": new_predicate,
                "object": obj
            })

        data = {
            'repertoryName': 'geo4',
            'oldList': str(modify_list),
            'newList': str(new_list)
        }

        self.search_util.resetRelTripleToRepertory(data)
        log = open(project_path + "/data/log/" + type + ".txt", "a")

        log.writelines("reset:  [" + type + "]" + modify_pre + "-->" + new +
                       "\n")
        log.writelines("=========================================\n")
Пример #11
0
    def resetProForPro(self, pro_old):
        """
        读取文件,根据文件中的三元组批量修改(修改三元组中的谓词,与resetProToRepertory不同在于此函数针对属性查询出所有三元组,而后者只差出该属性某一确定类型)
        :param pro_old:
        :return:
        """
        log = open(project_path + "/data/log/" + pro_old + ".txt", "a")
        triple_array = read_file(project_path + "/data/prosearch/" + pro_old +
                                 ".csv")
        for line in triple_array:
            data = line.split(",")
            subj = data[0]
            obje = data[1]
            pro_new = data[2]
            self.resetProForTriple(subj, obje, pro_old, pro_new)

            log.writelines("reset: [" + subj + ":" + pro_old + "]-->" +
                           pro_new + "\n")
            log.writelines("=========================================\n")
Пример #12
0
    def getStandard(self, words, father):
        """
        匹配模版库,得到标准问句
        :param words:
        :param type:
        :return:
        """

        template_questions = list(
            read_file(project_path + "/template_library/" + father + ".csv"))
        #print(template_questions)
        template_arr = []
        for template in template_questions:

            if template != "==========":
                template_arr.append(template)
            else:
                if words in template_arr:
                    return template_arr[0]
                else:
                    template_arr = []
Пример #13
0
    def questionPatternByType(self, type):
        """
        根据类型和模版文件得到该类型所有的问句模版
        :param type: 类型
        :return: 该类型对应的属性的所有问句模版
        """
        file_list = read_file(project_path + "/dealNLU/lake.txt")

        pro_list = self.getProByType(type)

        type_pro_pattern = []
        for p in pro_list:
            if p not in file_list:
                continue

            p_index = file_list.index(p)
            for pattern_index in range(p_index + 1, len(file_list)):
                if file_list[pattern_index] == "end":
                    break
                type_pro_pattern.append(file_list[pattern_index])

        return type_pro_pattern
Пример #14
0
    def __init__(self):
        """
        模版匹配器分为三个等级
        1.句式分类,分为是否问题,属性或实体询问问题,子集询问问题
        2.nlu处理后的模版匹配问题
        3.多个实体的模版匹配问题
        """
        self.syntaxMatch = {
            'task_whether': [['是不是'], ['是', '吗'], ['有没有'], ['有', '吗'], ['是否']],
            'task_subset': [['有哪些'], ['有什么'], ['哪些']]
        }

        self.nluMatch = {
            'task_common': [['ent', 'pro'], ['ent']],
            'task_difinition': [['ent'], ['ent-pro'], ['pro-ent']],
            'task_rel': [['ent', 'ent', 'pro']],
            'task_btw_ent': [['ent', 'ent']]
        }

        self.country = read_file(project_path + "/data/country.csv")
        """
        实体+属性
        喜马拉雅山的特征是什么
        什么是喜马拉雅山的特征
        喜马拉雅山有哪些特征
        """
        self.entproN = ['ent-pro-V-R', 'R-V-ent-pro', 'ent-V-R-pro']
        """
        喜马拉雅山怎么形成的
        怎么形成喜马拉雅山的
        喜马拉雅山是怎么形成的 'ent-V-R-pro'
        """
        self.entproV = ['ent-R-hed&pro', 'R-hed&pro-ent', 'ent-V-R-pro']
        """
        实体+关系
        中国的首都是什么
        什么是中国的首都
        中国的首都在哪
        
        中国的首都是什么城市
        什么城市是中国的首都
        中国的首都在哪个城市
        
        ent-rel
        
        俄罗斯位于哪里
        俄罗斯位于什么洲
        """

        self.entrelN = ['ent-rel-V-R', 'R-V-ent-rel']
        self.entrelNN = ['ent-rel-V-R-type', 'R-type-V-ent-rel']
        self.entrelV = ['ent-hed&rel-R']
        self.entrelVV = ['ent-hed&rel-R-type']
        """
        关系+关系值(实体)+实体类型-->实体
        
        向下找子类,得到相关属性/关系的属性关系值,匹配属性/关系的值,找到对应的实体
        """
        """
        北京是哪个国家的首都,哪个国家的首都是北京,首都是北京的是哪个国家,首都是北京的是哪个国家
        位于俄罗斯的淡水湖有哪些,有哪些淡水湖位于俄罗斯,哪些淡水湖位于俄罗斯,有哪些位于俄罗斯的淡水湖
        """
        self.relEtypeN = [
            'ent-V-R-type-rel', 'R-type-rel-V-ent', 'rel-V-ent-R-type'
        ]

        self.relEtypeV = [
            'rel-ent-type-V-R', 'V-R-type-rel-ent', 'R-type-hed&rel-ent',
            'V-R-rel-ent-type'
        ]
        """
        属性名+属性值+实体类型-->实体
        闽是哪个省的简称 哪个省的简称是闽 
        
        向下找子类,得到相关属性/关系的属性关系值,匹配属性/关系的值,找到对应的实体
        
        """

        self.pronvEtype = ['V-R-type-pro', 'R-type-pro-V', 'pro-type-V-R']
        self.pronvEnt = ['V-R-ent-pro', 'R-ent-pro-V', 'pro-ent-V-R']
        """
Пример #15
0
    def checkCalculateMost(self, question):
        """
        计算最值模块
        :param question:
        :return:
        """
        country_list = read_file(project_path + "/data/calculate/country.csv")
        city_list = read_file(project_path + "/data/calculate/city.csv")
        province_list = read_file(project_path +
                                  "/data/calculate/province.csv")
        state_list = read_file(project_path + "/data/calculate/state.csv")

        country_list = sorted(country_list, key=lambda i: len(i), reverse=True)
        city_list = sorted(city_list, key=lambda i: len(i), reverse=True)
        province_list = sorted(province_list,
                               key=lambda i: len(i),
                               reverse=True)
        state_list = sorted(state_list, key=lambda i: len(i), reverse=True)

        limit_location = []
        ask_type = []
        ask_predicate = []
        predicate_adj = []

        for country in country_list:
            if country in question:
                limit_location.append(country)
                break

        for city in city_list:
            if city in question:
                limit_location.append(city)

        for province in province_list:
            if province in question:
                limit_location.append(province)

        for state in state_list:
            if state in question:
                limit_location.append(state)

        if len(limit_location) < 1:
            limit_location.append("世界")

        #limit_location = ['世界']

        ans = self.calculate_most_aiml.respond(question)

        if ans == "":
            return None, None, None
        if self.is_Chinese(ans):
            return 'task_calculate', 'task_calculate_ask', ans

        if 'lake' in ans:

            if '淡水湖' in question:
                ask_type.append('淡水湖')
            elif '咸水湖' in question:
                ask_type.append('咸水湖')
            else:
                ask_type.append('湖泊')

            if 'area' in ans:
                ask_predicate.append('面积')
                if 'most' in ans:

                    predicate_adj.append('最大')
                elif 'least' in ans:
                    predicate_adj.append('最小')
            elif 'vol' in ans:
                ask_predicate.append('蓄水量')
                if 'most' in ans:

                    predicate_adj.append('最大')
                elif 'least' in ans:
                    predicate_adj.append('最小')
            elif 'deep' in ans:
                ask_predicate.append('深度')
                if 'most' in ans:
                    predicate_adj.append('最深')
                elif 'least' in ans:
                    predicate_adj.append('最浅')

        elif 'river' in ans:

            ask_type.append('河流')

            if 'area' in ans:
                ask_predicate.append('面积')
                if 'most' in ans:
                    predicate_adj.append('最大')
                elif 'least' in ans:
                    predicate_adj.append('最小')
            elif 'flow' in ans:
                ask_predicate.append('流量')
                if 'most' in ans:
                    predicate_adj.append('最大')
                elif 'least' in ans:
                    predicate_adj.append('最小')
            elif 'long' in ans:
                ask_predicate.append('长度')
                if 'most' in ans:
                    predicate_adj.append('最长')
                elif 'least' in ans:
                    predicate_adj.append('最短')

        elif 'mountain' in ans:
            ask_type.append('山峰')
            ask_type.append('山脉')
            if 'high' in ans:
                ask_predicate.append('海拔')
                if 'most' in ans:

                    predicate_adj.append('最高')
                elif 'least' in ans:
                    predicate_adj.append('最低')
            if 'south' in ans:
                ask_predicate.append('纬度')
                predicate_adj.append('最南')
            if 'north' in ans:
                ask_predicate.append('纬度')
                predicate_adj.append('最北')
            if 'east' in ans:
                ask_predicate.append('经度')
                predicate_adj.append("最东")
            if 'west' in ans:
                ask_predicate.append('经度')
                predicate_adj.append('最西')
            if 'long' in ans:
                ask_predicate.append('长度')
                ask_predicate.append('')
                predicate_adj.append('最长')
        elif 'sea' in ans:
            ask_type.append('海洋')
            if 'area' in ans:
                ask_predicate.append('面积')
                if 'most' in ans:
                    predicate_adj.append('最大')
                elif 'least' in ans:
                    predicate_adj.append('最小')
            if 'deep' in ans:
                ask_predicate.append('深度')
                ask_predicate.append('')
                if 'most' in ans:
                    predicate_adj.append('最深')
                elif 'least' in ans:
                    predicate_adj.append('最浅')

        ent_dict = {
            'limit': limit_location,
            'ask': ask_type,
            'predicate': ask_predicate,
            'predicate_adj': predicate_adj
        }

        return "task_calculate_most", ent_dict, ans
Пример #16
0
            tripleList.append({
                "subject": subject,
                "predicate": predicate,
                "object": "<" + object + ">"
            })
        self.search_util.deleteRelToRepertory(tripleList)
        for i in range(len(subj)):
            log.writelines("delete:  " + subj[i] + "-" + pred[i] + "-" +
                           obje[i] + "\n")
            log.writelines("=========================================\n")


if __name__ == '__main__':
    g = graphModify()
    #rel_add = read_file(project_path + "/data/operate/add.txt")
    rel_add = read_file(project_path + "/data3/complete/海洋_rel.csv")
    #pro_add = read_file(project_path + "/data3/complete/城市_pro.csv")
    rel_delete = read_file(project_path + "/data/operate/deleterel.txt")
    pro_delete = read_file(project_path + "/data/operate/delete.txt")

    #g.getInfForComplete('河流')

    #g.resetProForFile('河流pro')
    #g.search_util.addProperty("气候特征",'qihoutezheng')

    #g.resetProToNewOne('养殖对象','养殖物种')

    #rel_reset = read_file(project_path + "data/operate/relreset.txt")
    #pro_reset = read_file(project_path + "data/operate/reset.txt")
    """
    for r in rel_add:
Пример #17
0
    def checkCompareBySchema(self,question):
        """
        匹配比较模版,得到匹配结果
        :param question: 问句
        :return:
        1.比较信号
        2.抽取的信息
        3.匹配到的句子形式(其实就是更具体的任务类型)


        如果可以匹配上比较模版,那么第一个返回参数就是task_compare
        抽取的信息就是实体
        模版得到的返回结果不仅可以知道任务类型,实际上也携带了属性信息

        加入实行反问,那么抽取的实体将替换为反问表示,第一个返回数据仍然是比较标识,最后一个仍然是模版匹配结果
        """

        compare_words_more = ['大','深','高','东','南','西','北','长']
        compare_words_less = ['小','浅','低','短']

        mountain_list = read_file(project_path+"/data/compare/mountain.csv")
        lake_list = read_file(project_path+"/data/compare/lake.csv")
        hill_list = read_file(project_path+"/data/compare/hill.csv")
        river_list = read_file(project_path+"/data/compare/river.csv")
        sea_list = read_file(project_path+"/data/compare/sea.csv")

        mountain_list = sorted(mountain_list, key=lambda i: len(i), reverse=True)
        lake_list = sorted(lake_list, key=lambda i: len(i), reverse=True)
        hill_list = sorted(hill_list, key=lambda i: len(i), reverse=True)
        river_list = sorted(river_list, key=lambda i: len(i), reverse=True)
        sea_list = sorted(sea_list, key=lambda i: len(i), reverse=True)


        ask_hill= []
        ask_mountain = []
        ask_lake = []
        ask_river = []
        ask_sea = []
        ask_entity = []

        keyword = ""
        flag = ""

        ask_attr = ""

        for q in question[::-1]:
            if q in compare_words_more:
                keyword = q
                flag = "more"
                break
            if q in compare_words_less:
                keyword = q
                flag = "less"
                break

        for mountain in mountain_list:
            if mountain in question:
                question = question.replace(mountain,"MOUNTAIN")
                ask_mountain.append(mountain)
        if len(ask_mountain)>=2:
            ask_attr = self.graphsearch.getCompareKeyword(keyword,ask_mountain[0])
            ask_entity = ask_mountain
            ans_dict = {'entity': ask_entity, 'property': ask_attr, "flag":flag}
            return "task_compare",ans_dict
            #print(ask_attr)

        for hill in hill_list:
            if hill in question:
                question = question.replace(hill,"HILL")
                ask_hill.append(hill)
        if len(ask_hill)>=2 or len(ask_mountain+ask_hill)>=2:
            ask_attr = self.graphsearch.getCompareKeyword(keyword, ask_hill[0])
            ask_entity = ask_hill+ask_mountain
            ans_dict = {'entity': ask_entity, 'property': ask_attr, "flag": flag}
            return "task_compare",ans_dict
            #print(ask_attr)

        for lake in lake_list:
            if lake in question:
                question = question.replace(lake,"LAKE")
                ask_lake.append(lake)
        if len(ask_lake)>=2:
            ask_attr = self.graphsearch.getCompareKeyword(keyword, ask_lake[0])
            ask_entity = ask_lake
            ans_dict = {'entity': ask_entity, 'property': ask_attr, "flag": flag}
            return "task_compare",ans_dict
            #print(ask_attr)


        for river in river_list:
            if river in question:
                question = question.replace(river,"RIVER")
                ask_river.append(river)
        if len(ask_river) >= 2:
            ask_attr = self.graphsearch.getCompareKeyword(keyword, ask_river[0])
            ask_entity = ask_river
            ans_dict = {'entity': ask_entity, 'property': ask_attr, "flag": flag}
            return "task_compare",ans_dict
            #print(ask_attr)

        for sea in sea_list:
            if sea in question:
                question = question.replace(sea,"SEA")
                ask_sea.append(sea)
        if len(ask_sea) >= 2:
            ask_attr = self.graphsearch.getCompareKeyword(keyword, ask_sea[0])
            ask_entity = ask_sea
            ans_dict = {'entity': ask_entity, 'property': ask_attr, "flag": flag}
            return "task_compare",ans_dict
            #print(ask_attr)






        return "task_normal"
Пример #18
0
    def checkCompare(self,question):
        """
        匹配比较模版,得到匹配结果
        :param question: 问句
        :return:
        1.比较信号
        2.抽取的信息
        3.匹配到的句子形式(其实就是更具体的任务类型)


        如果可以匹配上比较模版,那么第一个返回参数就是task_compare
        抽取的信息就是实体
        模版得到的返回结果不仅可以知道任务类型,实际上也携带了属性信息

        加入实行反问,那么抽取的实体将替换为反问表示,第一个返回数据仍然是比较标识,最后一个仍然是模版匹配结果
        """
        mountain_list = read_file(project_path+"/data/compare/mountain.csv")
        lake_list = read_file(project_path+"/data/compare/lake.csv")
        hill_list = read_file(project_path+"/data/compare/hill.csv")
        river_list = read_file(project_path+"/data/compare/river.csv")
        sea_list = read_file(project_path+"/data/compare/sea.csv")

        mountain_list = sorted(mountain_list, key=lambda i: len(i), reverse=True)
        lake_list = sorted(lake_list, key=lambda i: len(i), reverse=True)
        hill_list = sorted(hill_list, key=lambda i: len(i), reverse=True)
        river_list = sorted(river_list, key=lambda i: len(i), reverse=True)
        sea_list = sorted(sea_list, key=lambda i: len(i), reverse=True)

        ask_hill= []
        ask_mountain = []
        ask_lake = []
        ask_river = []
        ask_sea = []

        for mountain in mountain_list:
            if mountain in question:
                question = question.replace(mountain,"MOUNTAIN")
                ask_mountain.append(mountain)

        for lake in lake_list:
            if lake in question:
                question = question.replace(lake,"LAKE")
                ask_lake.append(lake)

        for hill in hill_list:
            if hill in question:
                question = question.replace(hill,"HILL")
                ask_hill.append(hill)

        for river in river_list:
            if river in question:
                question = question.replace(river,"RIVER")
                ask_river.append(river)

        for sea in sea_list:
            if sea in question:
                question = question.replace(sea,"SEA")
                ask_sea.append(sea)

        ent_dict = {'mountain':ask_mountain,'hill':ask_hill,'river':ask_river
                    ,'lake':ask_lake,'sea':ask_sea}

        ans = self.compare_aiml.respond(question)
        if ans == "":
            return None,None,None
        elif self.is_Chinese(ans):
            return 'task_compare','task_compare_ask',ans
        else:
            entity,property=self.getCompareInfo(ans,ent_dict)
            ans_dict = {'entity':entity,'property':property}
            return "task_compare",ans_dict,ans
Пример #19
0
        formed_ent = []


        son_list = self.search_util.getEntityByType(entityType)

        for son in son_list:
            son_location = self.getLocation(son)
            if location in son_location:
                formed_ent.append(son)
        return formed_ent



if __name__ == '__main__':
    l = localtionInfernce()
    river = read_file(project_path+"/data/compare/river.csv")
    ans = l.getLocationByLimit('湖泊', '美国')
    print(ans)
    """
    while(1):
        r = input()
        ans = l.getLocationByLimit('湖泊','美国')
        print(ans)
    """