示例#1
0
def wordEmbedding(question): #change all questions in the corpora to vectors and store in a list
    embeddingList=[]
    nlp = en_core_web_lg.load()

    for x in range(len(question)):
        doc=nlp(preprocess(question[x]))
        pre=[doc]
        pre.append(prep.findsenti(question[x]))#also include the sentiment 
        embeddingList.append(pre)
    return embeddingList
def generate(intputSen, doc2, answer):
    index = 0
    nlp = en_core_web_lg.load()
    doc1 = nlp(preprocess(intputSen))
    inputsenti = prep.findsenti(intputSen)
    similarity = 0
    bestlist = []

    for x in range(len(doc2)):
        if doc2[x][0].vector_norm and doc1.vector_norm:
            similarity = doc1.similarity(
                doc2[x][0]
            )  #compare the input sentence and questions stored in the list

        if similarity > 0.60:
            # this is the threshold, so if this value is too high, then your input must
            #have a higher degree of similarity to the questions in the corpora
            index = x
            bestlist.append([similarity, index, doc2[x][1]])

    if len(bestlist) == 0:
        # at least 5 different  reasonable responses when the user enters something outside the two topics
        listReply = [
            'Sorry your question is not included in my database',
            'Sorry, I do not know how to reply that',
            'Whoops! my brain is dead, may be next question',
            'Pass that bro, I cannot remember',
            'This question is too difficult, next question please',
            'Your question is hard for me, sorry abour that'
        ]
        replyOutsideTopic = random.choice(listReply)
        print(replyOutsideTopic)
        return replyOutsideTopic
    sortedanswer = sorted(bestlist, key=operator.itemgetter(0))

    if len(sortedanswer) == 1:
        print(answer[sortedanswer[0][1]])
        return answer[sortedanswer[0][1]]
    else:
        if sortedanswer[-1][0] != sortedanswer[-2][0]:
            print(answer[sortedanswer[-1][1]])
            return answer[sortedanswer[-1][1]]
        else:
            if abs(sortedanswer[-1][2] - inputsenti) > abs(
                    sortedanswer[-2][2] - inputsenti
            ):  #if top 2 answer have same similarity, then check the sentiment.
                print(answer[sortedanswer[-2][1]])
                return answer[sortedanswer[-2][1]]
            else:
                print(answer[sortedanswer[-1][1]])
                return answer[sortedanswer[-1][1]]
示例#3
0
def generate(intputSen, doc2, answer):
    index = 0
    # get definition from Wikipedia
    if intputSen.find('ask wikipedia about') >= 0:
        userinput = intputSen.split('ask wikipedia about')[1].strip()
        # print('wikipedia api:' + userinput)
        try:
            r = wikipedia.summary(userinput)
            # make sure the length of message is less than socket buffer size
            if len(r) > 1024:
                r = r[:1024]
            while len(r) > 0 and not r.endswith('.'):
                r = r[:-1]
        except wikipedia.exceptions.DisambiguationError as e:
            r = str(e)
        except wikipedia.exceptions.PageError as pe:
            r = f'{userinput} does not match any wikipedia pages. Try another question!'
        except:
            r = 'exception occurs, please ask another question'
        return r

    nlp = en_core_web_lg.load()
    doc1 = nlp(preprocess(intputSen))
    inputsenti = prep.findsenti(intputSen)
    similarity = 0
    bestlist = []

    for x in range(len(doc2)):
        if doc2[x][0].vector_norm and doc1.vector_norm:
            similarity = doc1.similarity(
                doc2[x][0]
            )  # compare the input sentence and questions stored in the list

        if similarity > 0.60:
            # this is the threshold, so if this value is too high, then your input must
            # have a higher degree of similarity to the questions in the corpora
            index = x
            bestlist.append([similarity, index, doc2[x][1]])
    # if we can not find answers from given copra, then using wolfram
    if len(bestlist) == 0:
        try:
            # api usage see https://products.wolframalpha.com/api/documentation/
            params = {
                'input': intputSen,
                'appid': WOLFRAM_APP_ID,
                'output': 'json'
            }
            r = requests.get(url='http://api.wolframalpha.com/v2/query',
                             params=params)
            j = json.loads(r.content)
            return j['queryresult']['pods'][1]["subpods"][0]["plaintext"]
        except:
            # at least 5 different  reasonable responses when the user enters something outside the two topics
            listReply = [
                'Sorry your question is not included in my database',
                'Sorry, I do not know how to reply that',
                'Whoops! my brain is dead, may be next question',
                'Pass that bro, I cannot remember',
                'This question is too difficult, next question please',
                'Your question is hard for me, sorry about that'
            ]
            replyOutsideTopic = random.choice(listReply)
            print(replyOutsideTopic)
            return replyOutsideTopic
    sortedanswer = sorted(bestlist, key=operator.itemgetter(0))

    if len(sortedanswer) == 1:
        print(answer[sortedanswer[0][1]])
        return answer[sortedanswer[0][1]]
    else:
        if sortedanswer[-1][0] != sortedanswer[-2][0]:
            print(answer[sortedanswer[-1][1]])
            return answer[sortedanswer[-1][1]]
        else:
            if abs(sortedanswer[-1][2] - inputsenti) > abs(
                    sortedanswer[-2][2] - inputsenti
            ):  # if top 2 answer have same similarity, then check the sentiment.
                print(answer[sortedanswer[-2][1]])
                return answer[sortedanswer[-2][1]]
            else:
                print(answer[sortedanswer[-1][1]])
                return answer[sortedanswer[-1][1]]