def getBestAnswer(answer_list, all_key_set, key_list): # 根据辅助词和非辅助词,获得价值大的答案 records = [] answer = [] for i in range(0, len(answer_list)): keywords = keywordextract.keywordExtract(answer_list[i][1]) score = 0 # 根据权值来选择答案 for key in all_key_set: if key == answer_list[i][0]: score = score + 3 # 标志性的词语权值为3,不然为1 elif key in keywords: score = score + 1 tmp_answer = (key_list[i] + "的" + answer_list[i][0], answer_list[i][1]) records.append((tmp_answer, score)) records = sorted(records, key=lambda recode: recode[1], reverse=True) # 排序 max_score = records[0][1] for i in range(0, len(records)): if records[i][1] == max_score: answer_str = records[i][0][0] + "为:" + records[i][0][1] answer.append(answer_str) else: break return answer
def ansUsage(question): #得到节点和关系节点的集合 graph,relnodes=preData() nodesNames=getAllNodes(graph, relnodes) #得到关键词语 keywords=keywordextract.keywordExtract(question) support_words = getSupportWord() # keywords = changeNode(keywords,nodesNames) for _key in keywords: print _key #满足要求的keywords keys=set([]) all_key_set = set() # 包含非节点关键词的集合 #求的编辑距离 for keyword in keywords: if not if_support(support_words, keyword): records=[] for word in nodesNames: pro=calPro(keyword,word) if pro>0.5: records.append((word,pro)) #排序 records=sorted(records,key=lambda recode:recode[1],reverse=True) if len(records)!=0: keys.add(records[0][0]) else: all_key_set.add(keyword) #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息 usages=[] #找到节点的情况 if len(keys) != 0: usages=getUsage(graph, relnodes, keys, all_key_set) return usages #没有找到节点的情况 else: no_support_words = [] # 不含辅助词的集合 for keyword in keywords: if not if_support(support_words, keyword): no_support_words.append(keyword) #keywords=list(keys) #关键字处理,关键字可能得不到我们要的节点 if len(no_support_words) != 0: nodes=getExistNode(graph, relnodes, no_support_words) if len(nodes)!=0: usages = getUsageAttr(graph, relnodes, nodes, keywords) else: usages.append("无法找到答案") return usages else: usages.append("无法找到答案") return usages
def ansUsage(question): #得到节点和关系节点的集合 graph, relnodes = preData() nodesNames = getAllNodes(graph, relnodes) #得到关键词语 keywords = keywordextract.keywordExtract(question) support_words = getSupportWord() # keywords = changeNode(keywords,nodesNames) for _key in keywords: print(_key.encode("utf-8")) #满足要求的keywords keys, all_key_set = getKeySet(keywords, nodesNames, question) # 获得节点关键词的集合,与非节点关键词的集合,都不含辅助词 #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息 usages = [] #找到节点的情况 if len(keys) != 0: usages = getUsage(graph, relnodes, keys, all_key_set) return usages #没有找到节点的情况 else: no_support_words = [] # 不含辅助词的集合 for keyword in keywords: if not if_support(support_words, keyword): no_support_words.append(keyword) #keywords=list(keys) #关键字处理,关键字可能得不到我们要的节点 if len(no_support_words) != 0: nodes = getExistNode(graph, relnodes, no_support_words) if len(nodes) != 0: usages = getUsageAttr(graph, relnodes, nodes, keywords) else: usages.append("能力有限,暂时还回答不了") return usages else: usages.append("能力有限,暂时还回答不了") return usages
def ansContent(question): #得到节点和关系节点的集合 graph,relnodes=preData() nodesNames=getAllNodes(graph, relnodes) #得到关键词语 keywords=keywordextract.keywordExtract(question) for _key in keywords: print _key #满足要求的keywords keys=set([]) all_key_set = set() # 包含非节点关键词的集合 support_words = getSupportWord() #求的编辑距离 for keyword in keywords: if not if_support(support_words, keyword): records=[] for word in nodesNames: pro=calPro(keyword,word) if pro>0.5: records.append((word,pro)) #排序 records=sorted(records,key=lambda recode:recode[1],reverse=True) if len(records)!=0: keys.add(records[0][0]) else: all_key_set.add(keyword) #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息 length=len(keys) contents=[] #找到节点的情况 if length>0: for name in keys: content=getContent(graph, relnodes, name, all_key_set) contents = content return contents #没有找到节点的情况,直接和用法类的问题一样处理 else: return ansUsage(question)
def ansDef(question): #得到节点和关系节点的集合 graph, relnodes = preData() nodesNames = getAllNodes(graph, relnodes) #得到关键词语 keywords = keywordextract.keywordExtract(question) for keyword in keywords: print(keyword.encode("utf-8")) #满足要求的keywords keys, all_key_set = getKeySet(keywords, nodesNames, question) # 获得节点关键词的集合,与非节点关键词的集合,都不含辅助词 #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息 length = len(keys) definitions = [] #找到节点的情况 if length > 0: definitions = getDefinition(graph, relnodes, keys, all_key_set) return definitions #没有找到节点的情况,直接和用法类的问题一样处理 else: return ansUsage(question)
def ansComp(question): # 得到节点和关系节点的集合 graph, relnodes = preData() nodesNames = getAllNodes(graph, relnodes) # 得到关键词语 keywords = keywordextract.keywordExtract(question) keys, all_key_set = getKeySet(keywords, nodesNames, question) # 获得节点关键词的集合,与非节点关键词的集合,都不含辅助词 # #在回答比较的时候,现在为非节点支持词进行同义词词林扩展,主要是找到“区别”,“相同点”,“比较”,“关系”等词 # extend=[] # # for word in all_key_set: # extend.extend(getCorela(word,graph,relnodes)) # 先初始化:“区别”,“相同点”,“比较”,“关系”都为false # 0001 0010 0100 1000 mark = 0 if u'区别' in all_key_set: mark = 1 elif u'相同点' in all_key_set: mark = 2 elif u'比较' in all_key_set: mark = 4 elif u'关系' in all_key_set: mark = 8 for item in keys: print(item) print '*********************' for item in all_key_set: print(item) comparision = [] #都是非节点关键字的时候,比如%d%s做出比较 if len(keys) == 0: #准备一个交集 interSet = set(getMaxNode(graph, relnodes, keywords[0])) for j in xrange(1, len(keywords)): key = keywords[j] curset = set(getMaxNode(graph, relnodes, key)) if len(curset) != 0: interSet &= curset #如果多个非节点关键字对应一个共同的节点,那么就得出该节点 if len(interSet) != 0: node = list(interSet)[0] comparision.append( "它们都是" + str(node) + "中的概念。" + getComparison(graph, relnodes, str(node), keywords)) else: for key in keywords: node = getMaxNode(graph, relnodes, key) if len(node) != 0: node = node[0] comparision.append( getDefiniAttr(graph, relnodes, unicode(node), key)) return comparision #都是节点关键字的情况(这里希望拿到两个节点,但是可能会有更多) #在这里我们开始讨论节点关键字,然后修改为将相同点和非相同点区别开来回答。 elif len(keys) != 0: #首先判断它们有直接的关系吗,即是否为nor_rel中的相关节点 #若有,然后直接看是否可以找到相同点,区别,比较,关系的作答 length = len(keys) #如果不关键词的长度不超过2,就不能进行比较 if length < 2: comparision = getDefinition(graph, relnodes, keys, all_key_set) comparision.insert(0, '对不起,我只找到了一个概念,您可以说的我更加懂你。') return comparision else: keys = list(keys) for i in xrange(length): node = getNodeByname(unicode(keys[i]), graph) if node == None: continue relsAndNodes = node.get_Rela() adjoints = set([str(item[1]) for item in relsAndNodes]) others = set([str(keys[j]) for j in xrange(i + 1, length)]) others &= adjoints if mark == 1: comparision.append(getDifPoints(node, others, graph)) elif mark == 2: comparision.append(getSamePoints(node, others, graph)) elif mark == 4: comparision.append(getSamePoints(node, others, graph)) comparision.append(getDifPoints(node, others, graph)) elif mark == 8: comparision.append(getRela(node, others, graph)) if len(comparision) != comparision.count(''): return comparision #下面是考虑没有直接关系的情况,即不能直接作答,需要考虑到它们的父亲节点 else: #可能有'',所以删除它们, comparision = filter(lambda i: i != '', comparision) #先查看节点是否有公共的父亲节点 #所有节点 nodes = [] for item in keys: item = transfer(item) node = getNodeByname(unicode(item), graph) if node == None: continue if len(node.get_Parents()) != 0: nodes.append(node) interSet = set(nodes[0].get_Parents()) for j in xrange(1, len(nodes)): node = nodes[j] interSet &= set(node.get_Parents()) #注意从这里开始并没有将相同点,区别和比较进行划分 #如果有公共的父节点,然后回答 if len(interSet) != 0: #目前这里只考虑一种一个父节点的情况 parent = list(interSet)[0] strs = '它们都是' + str(parent) + "中的概念。" strs += getCommAttr(nodes) comparision.append(strs) return comparision #没有公共节点,那么就回答各自的定义 else: comparision = getDefinition(graph, relnodes, keys, all_key_set) return comparision
def ansComp(question): # 得到节点和关系节点的集合 graph, relnodes = preData() nodesNames = getAllNodes(graph, relnodes) # 得到关键词语 keywords = keywordextract.keywordExtract(question) # keywords=['%s','%d'] for _key in keywords: print _key # 满足要求的keywords keys = set([]) all_key_set = set() # 包含非节点关键词的集合 support_words = getSupportWord() #求的编辑距离 for keyword in keywords: if not if_support(support_words, keyword): records = [] for word in nodesNames: pro = calPro(keyword, word) if pro > 0.5: records.append((word, pro)) # 排序 records=sorted(records, key=lambda recode: recode[1], reverse=True) if len(records)!=0: keys.add(records[0][0]) else: all_key_set.add(keyword) comparision=[] #都是非节点关键字的时候,比如%d%s做出比较 if len(keys)==0: #准备一个交集 interSet=set(getMaxNode(graph,relnodes,keywords[0])) for j in xrange(1,len(keywords)): key=keywords[j] curset=set(getMaxNode(graph,relnodes,key)) if len(curset)!=0: interSet &=curset #如果多个非节点关键字对应一个共同的节点,那么就得出该节点 if len(interSet)!=0: node=list(interSet)[0] comparision.append("它们都是"+str(node)+"中的概念。"+getComparison(graph,relnodes,str(node),keywords)) else: for key in keywords: node=getMaxNode(graph,relnodes,key) if len(node)!=0: node=node[0] comparision.append(getDefiniAttr(graph,relnodes,unicode(node),key)) return comparision #都是节点关键字的情况(这里希望拿到两个节点,但是可能会有更多) elif len(keys)!=0: #首先判断它们有直接的关系吗 length=len(keys) keys=list(keys) for i in xrange(length): node=getNodeByname(keys[0],graph) relsAndNodes=node.get_Rela() adjoints=set([str(item[1]) for item in relsAndNodes]) others=set([str(keys[j]) for j in xrange(i+1,length)]) others&=adjoints comparision.append(getRelations(node,others)) if len(comparision)!=comparision.count(''): return comparision #下面是考虑没有直接关系的情况,需要考虑到它们的父亲节点 else: #可能有'',所以删除它们, comparision=filter(lambda i:i!='',comparision) #先查看节点是否有公共的父亲节点 #所有节点 nodes=[] for item in keys: node=getNodeByname(item,graph) if len(node.get_Parents())!=0: nodes.append(node) interSet=set(nodes[0].get_Parents()) for j in xrange(1,len(nodes)): node=nodes[j] interSet&=set(node.get_Parents()) #如果有公共的父节点 if len(interSet)!=0: #目前这里只考虑一种一个父节点的情况 parent=list(interSet)[0] strs='它们都是'+str(parent)+"中的概念。" strs+=getCommAttr(nodes) comparision.append(strs) return comparision #没有公共节点,那么就回答各自的定义 else: comparision = getDefinition(graph,relnodes,keys,all_key_set) return comparision