Пример #1
0
def getBestAnswer(answer_list, all_key_set, key_list):  # 根据辅助词和非辅助词,获得价值大的答案
    records = []
    answer = []
    for i in range(0, len(answer_list)):
        keywords = keywordextract.keywordExtract(answer_list[i][1])
        score = 0  # 根据权值来选择答案
        for key in all_key_set:
            if key == answer_list[i][0]:
                score = score + 3  # 标志性的词语权值为3,不然为1
            elif key in keywords:
                score = score + 1

        tmp_answer = (key_list[i] + "的" + answer_list[i][0], answer_list[i][1])

        records.append((tmp_answer, score))

    records = sorted(records, key=lambda recode: recode[1], reverse=True)  # 排序
    max_score = records[0][1]
    for i in range(0, len(records)):
        if records[i][1] == max_score:
            answer_str = records[i][0][0] + "为:" + records[i][0][1]
            answer.append(answer_str)
        else:
            break

    return answer
Пример #2
0
def ansUsage(question):
    #得到节点和关系节点的集合
    graph,relnodes=preData()
    nodesNames=getAllNodes(graph, relnodes)
    #得到关键词语
    keywords=keywordextract.keywordExtract(question)
    support_words = getSupportWord()
    # keywords = changeNode(keywords,nodesNames)
    for _key in keywords:
        print _key
    #满足要求的keywords
    keys=set([])
    all_key_set = set()  # 包含非节点关键词的集合
    #求的编辑距离
    for keyword in keywords:
        if not if_support(support_words, keyword):
            records=[]
            for word in nodesNames:
                pro=calPro(keyword,word)
                if pro>0.5:
                    records.append((word,pro))

            #排序
        
            records=sorted(records,key=lambda recode:recode[1],reverse=True)
            if len(records)!=0:
                keys.add(records[0][0])
            else:
                all_key_set.add(keyword)
    #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息
    usages=[]
    #找到节点的情况
    if len(keys) != 0:
        usages=getUsage(graph, relnodes, keys, all_key_set)
        return usages
    
    #没有找到节点的情况
    else:
        no_support_words = []  # 不含辅助词的集合
        for keyword in keywords:
            if not if_support(support_words, keyword):
                no_support_words.append(keyword)
        
        #keywords=list(keys)
        #关键字处理,关键字可能得不到我们要的节点
        if len(no_support_words) != 0:
            nodes=getExistNode(graph, relnodes, no_support_words)
            if len(nodes)!=0:
                usages = getUsageAttr(graph, relnodes, nodes, keywords)
            else:
                usages.append("无法找到答案")
            return usages

        else:
            usages.append("无法找到答案")
            return usages
Пример #3
0
def ansUsage(question):
    #得到节点和关系节点的集合
    graph, relnodes = preData()
    nodesNames = getAllNodes(graph, relnodes)
    #得到关键词语
    keywords = keywordextract.keywordExtract(question)
    support_words = getSupportWord()
    # keywords = changeNode(keywords,nodesNames)
    for _key in keywords:
        print(_key.encode("utf-8"))
    #满足要求的keywords

    keys, all_key_set = getKeySet(keywords, nodesNames,
                                  question)  # 获得节点关键词的集合,与非节点关键词的集合,都不含辅助词

    #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息
    usages = []
    #找到节点的情况
    if len(keys) != 0:
        usages = getUsage(graph, relnodes, keys, all_key_set)
        return usages

    #没有找到节点的情况
    else:

        no_support_words = []  # 不含辅助词的集合
        for keyword in keywords:
            if not if_support(support_words, keyword):
                no_support_words.append(keyword)

        #keywords=list(keys)
        #关键字处理,关键字可能得不到我们要的节点
        if len(no_support_words) != 0:
            nodes = getExistNode(graph, relnodes, no_support_words)
            if len(nodes) != 0:
                usages = getUsageAttr(graph, relnodes, nodes, keywords)
            else:
                usages.append("能力有限,暂时还回答不了")
            return usages

        else:
            usages.append("能力有限,暂时还回答不了")
            return usages
Пример #4
0
def ansContent(question):
    #得到节点和关系节点的集合
    graph,relnodes=preData()
    nodesNames=getAllNodes(graph, relnodes)
    #得到关键词语
    keywords=keywordextract.keywordExtract(question)
    for _key in keywords:
        print _key
    #满足要求的keywords
    keys=set([])
    all_key_set = set()  # 包含非节点关键词的集合
    support_words = getSupportWord()
    #求的编辑距离
    for keyword in keywords:
        if not if_support(support_words, keyword):
            records=[]
            for word in nodesNames:
                pro=calPro(keyword,word)
                if pro>0.5:
                    records.append((word,pro))
            #排序
        
            records=sorted(records,key=lambda recode:recode[1],reverse=True)
            if len(records)!=0:
                keys.add(records[0][0])
            else:
                all_key_set.add(keyword)
        
            
    #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息
    length=len(keys)
    contents=[]
    #找到节点的情况
    if length>0:
        for name in keys:
            content=getContent(graph, relnodes, name, all_key_set)
            contents = content
        return contents
    
    #没有找到节点的情况,直接和用法类的问题一样处理
    else:
        return ansUsage(question)
Пример #5
0
def ansDef(question):
    #得到节点和关系节点的集合
    graph, relnodes = preData()
    nodesNames = getAllNodes(graph, relnodes)
    #得到关键词语
    keywords = keywordextract.keywordExtract(question)
    for keyword in keywords:
        print(keyword.encode("utf-8"))
    #满足要求的keywords
    keys, all_key_set = getKeySet(keywords, nodesNames,
                                  question)  # 获得节点关键词的集合,与非节点关键词的集合,都不含辅助词
    #如果keys中有,表示识别到节点,若是没有那么就开始找节点内部的信息
    length = len(keys)

    definitions = []
    #找到节点的情况
    if length > 0:
        definitions = getDefinition(graph, relnodes, keys, all_key_set)
        return definitions

    #没有找到节点的情况,直接和用法类的问题一样处理
    else:
        return ansUsage(question)
Пример #6
0
def ansComp(question):
    # 得到节点和关系节点的集合
    graph, relnodes = preData()
    nodesNames = getAllNodes(graph, relnodes)
    # 得到关键词语
    keywords = keywordextract.keywordExtract(question)

    keys, all_key_set = getKeySet(keywords, nodesNames,
                                  question)  # 获得节点关键词的集合,与非节点关键词的集合,都不含辅助词

    # #在回答比较的时候,现在为非节点支持词进行同义词词林扩展,主要是找到“区别”,“相同点”,“比较”,“关系”等词
    # extend=[]
    #
    # for word in all_key_set:
    #     extend.extend(getCorela(word,graph,relnodes))
    # 先初始化:“区别”,“相同点”,“比较”,“关系”都为false
    # 0001 0010 0100 1000
    mark = 0
    if u'区别' in all_key_set:
        mark = 1
    elif u'相同点' in all_key_set:
        mark = 2
    elif u'比较' in all_key_set:
        mark = 4
    elif u'关系' in all_key_set:
        mark = 8

    for item in keys:
        print(item)
    print '*********************'
    for item in all_key_set:
        print(item)

    comparision = []
    #都是非节点关键字的时候,比如%d%s做出比较
    if len(keys) == 0:
        #准备一个交集
        interSet = set(getMaxNode(graph, relnodes, keywords[0]))

        for j in xrange(1, len(keywords)):
            key = keywords[j]
            curset = set(getMaxNode(graph, relnodes, key))
            if len(curset) != 0:
                interSet &= curset

        #如果多个非节点关键字对应一个共同的节点,那么就得出该节点
        if len(interSet) != 0:
            node = list(interSet)[0]
            comparision.append(
                "它们都是" + str(node) + "中的概念。" +
                getComparison(graph, relnodes, str(node), keywords))
        else:
            for key in keywords:
                node = getMaxNode(graph, relnodes, key)
                if len(node) != 0:
                    node = node[0]

                    comparision.append(
                        getDefiniAttr(graph, relnodes, unicode(node), key))

        return comparision
    #都是节点关键字的情况(这里希望拿到两个节点,但是可能会有更多)
    #在这里我们开始讨论节点关键字,然后修改为将相同点和非相同点区别开来回答。
    elif len(keys) != 0:

        #首先判断它们有直接的关系吗,即是否为nor_rel中的相关节点
        #若有,然后直接看是否可以找到相同点,区别,比较,关系的作答
        length = len(keys)
        #如果不关键词的长度不超过2,就不能进行比较
        if length < 2:

            comparision = getDefinition(graph, relnodes, keys, all_key_set)
            comparision.insert(0, '对不起,我只找到了一个概念,您可以说的我更加懂你。')
            return comparision
        else:
            keys = list(keys)
            for i in xrange(length):
                node = getNodeByname(unicode(keys[i]), graph)
                if node == None:
                    continue

                relsAndNodes = node.get_Rela()
                adjoints = set([str(item[1]) for item in relsAndNodes])
                others = set([str(keys[j]) for j in xrange(i + 1, length)])

                others &= adjoints

                if mark == 1:
                    comparision.append(getDifPoints(node, others, graph))
                elif mark == 2:
                    comparision.append(getSamePoints(node, others, graph))
                elif mark == 4:
                    comparision.append(getSamePoints(node, others, graph))
                    comparision.append(getDifPoints(node, others, graph))

                elif mark == 8:
                    comparision.append(getRela(node, others, graph))

            if len(comparision) != comparision.count(''):
                return comparision
            #下面是考虑没有直接关系的情况,即不能直接作答,需要考虑到它们的父亲节点
            else:
                #可能有'',所以删除它们,
                comparision = filter(lambda i: i != '', comparision)
                #先查看节点是否有公共的父亲节点

                #所有节点
                nodes = []
                for item in keys:
                    item = transfer(item)
                    node = getNodeByname(unicode(item), graph)
                    if node == None:
                        continue
                    if len(node.get_Parents()) != 0:
                        nodes.append(node)
                interSet = set(nodes[0].get_Parents())
                for j in xrange(1, len(nodes)):
                    node = nodes[j]
                    interSet &= set(node.get_Parents())

                #注意从这里开始并没有将相同点,区别和比较进行划分
                #如果有公共的父节点,然后回答
                if len(interSet) != 0:
                    #目前这里只考虑一种一个父节点的情况
                    parent = list(interSet)[0]
                    strs = '它们都是' + str(parent) + "中的概念。"
                    strs += getCommAttr(nodes)
                    comparision.append(strs)
                    return comparision
                #没有公共节点,那么就回答各自的定义
                else:
                    comparision = getDefinition(graph, relnodes, keys,
                                                all_key_set)
                    return comparision
Пример #7
0
def ansComp(question):
    # 得到节点和关系节点的集合
    graph, relnodes = preData()
    nodesNames = getAllNodes(graph, relnodes)
    # 得到关键词语
    keywords = keywordextract.keywordExtract(question)
    # keywords=['%s','%d']
    for _key in keywords:
        print _key
    # 满足要求的keywords
    keys = set([])
    all_key_set = set()  # 包含非节点关键词的集合
    support_words = getSupportWord()
    #求的编辑距离
    for keyword in keywords:
        if not if_support(support_words, keyword):         
            records = []  
            
            for word in nodesNames:
                pro = calPro(keyword, word)
                if pro > 0.5:
                    records.append((word, pro))
            # 排序
            records=sorted(records, key=lambda recode: recode[1], reverse=True)
            if len(records)!=0:
                keys.add(records[0][0])
            else:
                all_key_set.add(keyword)

    comparision=[]
    #都是非节点关键字的时候,比如%d%s做出比较
    if len(keys)==0:
        #准备一个交集
        interSet=set(getMaxNode(graph,relnodes,keywords[0]))

        for j in xrange(1,len(keywords)):
            key=keywords[j]
            curset=set(getMaxNode(graph,relnodes,key))
            if len(curset)!=0:
                interSet &=curset

        #如果多个非节点关键字对应一个共同的节点,那么就得出该节点
        if len(interSet)!=0:
            node=list(interSet)[0]
            comparision.append("它们都是"+str(node)+"中的概念。"+getComparison(graph,relnodes,str(node),keywords))
        else:
            for key in keywords:
                node=getMaxNode(graph,relnodes,key)
                if len(node)!=0:
                    node=node[0]
                    
                    comparision.append(getDefiniAttr(graph,relnodes,unicode(node),key))

        return comparision
    #都是节点关键字的情况(这里希望拿到两个节点,但是可能会有更多)
    elif len(keys)!=0:
        #首先判断它们有直接的关系吗
        length=len(keys)
        keys=list(keys)
        for i in xrange(length):
            node=getNodeByname(keys[0],graph)
            relsAndNodes=node.get_Rela()
            adjoints=set([str(item[1]) for item in relsAndNodes])
            others=set([str(keys[j]) for j in xrange(i+1,length)])
            
            others&=adjoints
            comparision.append(getRelations(node,others))
        if len(comparision)!=comparision.count(''):
            return comparision
        #下面是考虑没有直接关系的情况,需要考虑到它们的父亲节点
        else:
            #可能有'',所以删除它们,
            comparision=filter(lambda i:i!='',comparision)
            #先查看节点是否有公共的父亲节点
            
            #所有节点
            nodes=[]
            for item in keys:
                node=getNodeByname(item,graph)
                if len(node.get_Parents())!=0:
                    nodes.append(node)
            interSet=set(nodes[0].get_Parents())        
            for j in xrange(1,len(nodes)):
                node=nodes[j]
                interSet&=set(node.get_Parents())
            
            #如果有公共的父节点
            if len(interSet)!=0:
                #目前这里只考虑一种一个父节点的情况
                parent=list(interSet)[0]
                strs='它们都是'+str(parent)+"中的概念。"
                strs+=getCommAttr(nodes)
                comparision.append(strs)
                return comparision
            #没有公共节点,那么就回答各自的定义
            else:
                comparision = getDefinition(graph,relnodes,keys,all_key_set)
                return comparision