Пример #1
0
    def printInfo(event):
        seg = Seg()
        seg.load_userdict('../userdict/userdict.txt')
        # 读取数据
        List_kw, questionList, answerList = read_corpus1()
        # 初始化模型
        ss = SentenceSimilarity(seg)
        ss.set_sentences(questionList)
        ss.TfidfModel()  # tfidf模型
        # ss.LsiModel()         # lsi模型
        # ss.LdaModel()         # lda模型
        text2.delete(1.0, END)
        question = (text1.get('1.0', END))

        #if question == 'q':
        #break
        time1 = time.time()
        question_k = ss.similarity_k(question, 5)
        text2.insert("insert", ": {}".format(answerList[question_k[0][0]]))
        #print(": {}".format(answerList[question_k[0][0]]))
        #for idx, score in zip(*question_k):
        # print("same questions: {},                score: {}".format(questionList[idx], score))
        #time2 = time.time()
        #cost = time2 - time1
        #print('Time cost: {} s'.format(cost))
        #entry2.insert(10,question)
        #清空entry2控件
        text1.delete(1.0, END)
        syn(": {}".format(answerList[question_k[0][0]]))
Пример #2
0
def main(question, top_k, task='faq'):
    # 读取数据
    if task == 'chat':
        qList_kw, questionList, answerList = read_corpus2()
    else:
        qList_kw, questionList, answerList = read_corpus1()
    """简单的倒排索引"""
    # 计算倒排表
    invertTable = invert_idxTable(qList_kw)
    inputQuestionKW = seg.cut(question)

    # 利用关键词匹配得到与原来相似的问题集合
    questionList_s, answerList_s = filter_questionByInvertTab(
        inputQuestionKW, questionList, answerList, invertTable)
    # 初始化模型
    ss = SentenceSimilarity(seg)
    ss.set_sentences(questionList_s)
    ss.TfidfModel()  # tfidf模型
    # ss.LsiModel()         # lsi模型
    # ss.LdaModel()         # lda模型
    question_k = ss.similarity_k(question, top_k)
    return question_k, questionList_s, answerList_s
Пример #3
0
    seg.load_userdict('./userdict/userdict.txt')
    # 读取数据
    List_kw, questionList, answerList = read_corpus()
    # 初始化模型
    ss = SentenceSimilarity(seg)
    ss.set_sentences(questionList)
    ss.TfidfModel()         # tfidf模型
    # ss.LsiModel()         # lsi模型
    # ss.LdaModel()         # lda模型

    while True:
        question = input("请输入问题(q退出): ")
        if question == 'q':
            break
        time1 = time.time()
        question_k = ss.similarity_k(question, 5)
        print("亲,我们给您找到的答案是: {}".format(answerList[question_k[0][0]]))
        for idx, score in zip(*question_k):
            print("same questions: {},                score: {}".format(questionList[idx], score))
        time2 = time.time()
        cost = time2 - time1
        print('Time cost: {} s'.format(cost))