示例#1
0
def original_predict(dialogue_tuple):
	debug("Original predict")
	target_features, target_feature_matrix = feature_engineering(dialogue_tuple)
	target_x = []
	for tf in target_feature_matrix:
		tx = []
		for f in original_all_features:
			if f in tf:
				tx.append(1)
			else:
				tx.append(0)
		target_x.append(tx)
	random_result = original_random_clf.predict_proba(target_x)
	logistic_result = original_logistic_clf.predict_proba(target_x)
	resp_dict = {}
	resp_list = []
	for ri in range(len(logistic_result[0])):
		resp_dict[original_unique_intents[ri]] = (random_result[0][ri], logistic_result[0][ri])
		resp_list.append({"intent": original_unique_intents[ri], "random": random_result[0][ri], "logistic": logistic_result[0][ri], "score": (random_result[0][ri] + logistic_result[0][ri]) / 2})
	sorted_resp_list = sorted(resp_list, key=lambda k: k["score"])
	sorted_resp_list.reverse()
	debug(sorted_resp_list)
	for ri in range(len(sorted_resp_list)):
		sorted_resp_list[ri]['random'] = str(sorted_resp_list[ri]['random'])
		sorted_resp_list[ri]['logistic'] = str(sorted_resp_list[ri]['logistic'])
		sorted_resp_list[ri]['score'] = str(sorted_resp_list[ri]['score'])
	return sorted_resp_list
示例#2
0
文件: model.py 项目: vc2309/NLP-Proj
    def upload_memory(self):
        if isinstance(self.memory, str):
            self.memory = loads(self.memory)
            if not isinstance(self.memory, dict):
                self.memory = {}

        memory = self.memory.copy()
        cquery = memory.get('cquery', {})
        self.memory = dict(
            context=memory.get('context', NONE) or NONE,
            cquery=dict(
                keyword=cquery.get('keyword', NONE) or NONE,
                refinements=list(set(cquery.get('refinements', []) or [])),
                authors=cquery.get('authors', []) or [],
                date=cquery.get('date', []) or [],
            ),
            sender=int(self._sid)
        )

        # debug(self.memory, 'Before Sanitized Memory:')
        self.memory = clean_empty(self.memory)
        # debug(self.memory, 'After Sanitized Memory:')

        self._memories.put_item(Item=self.memory)
        debug('Updated memory on dynamodb.')

        debug(self.memory, 'DynamoDB memory:')
示例#3
0
    def receive(self, evt):
        message = Message(evt, self._model)
        # get back results
        context, results, cache = message.analyse()

        # save last result to dynamodb
        cache['sender'] = self._sid
        cache['datetime'] = int(results[-1].get('datetime', time.time()))
        cache['reply'] = results[-1]
        self.cache = cache.copy()
        del cache

        # for each result send api request
        for result in results:
            self.send(result)

        # print out the context and update
        memory = self._model.memory
        memory['context'] = context or memory['context']
        del context
        debug(memory.get('context'), 'context')

        # update dynamodb memory
        self._model.upload_memory()
        # return 200
        debug(evt,
              'Finished process request from sender({}):'.format(self._sid))
        return ('Offical Response.', 200)
示例#4
0
    def __init__(self, sid, rid):
        self._sid = sid
        self._rid = rid
        (self._ACCESS_TOKEN, self._UNIVERSITY,
         self._Special_case_list) = get_setup(rid, sid)

        if not self._ACCESS_TOKEN:
            debug('Recipient ({}) does not exist.'.format(rid), 'WARNING')
            return ('Done', 200)

        self._model = Model(rid, sid, self._UNIVERSITY)
        self._sender_details = get_user_details(self._ACCESS_TOKEN, sid)
示例#5
0
 def __init__(self, original, payload, model, template, results):
     self._actions, self._payload = model.get_payload(payload).values()
     self._model = model
     self._template = template
     self._results = results
     self._original = original
     self.msg = original
     self.predictions = []
     self.general_slots = []
     if self._payload:
         self.msg = ''
     debug('Incoming payload: {}'.format(self._payload))
示例#6
0
文件: model.py 项目: vc2309/NLP-Proj
 def search_result(self, resultobj):
     debug('creating new search result')
     for result in resultobj:
         tmp = clean_empty(result.copy())
         self._result.put_item(Item=dict(
             image=tmp.get('image', NONE),
             subtitle=tmp.get('subtitle', NONE),
             summary=tmp.get('summary', NONE),
             title=tmp.get('title', NONE),
             url=tmp.get('url', NONE),
             id=tmp.get('uuid', NONE)
         ))
示例#7
0
def predict(target_message):
	# debug(target_message)
	pre, stanford_responses = nlp_preprocess([target_message])
	stanford_response = stanford_responses[0]

	label_result = labels_predict(stanford_response, pre)
	original_result = original_predict(pre)
	debug(label_result[:3])
	debug(original_result[:3])
	return label_result, original_result, stanford_responses


# predict('I am looking for visting the law library this coming thursday and next tuesday. Can I')
示例#8
0
    def send(self, msg):
        msg['recipient'] = {'id': self._sid}
        msg = clean_empty(msg)
        debug(msg, 'Before send to Send API')
        res = requests.post('https://graph.facebook.com/v2.6/me/messages',
                            params={"access_token": self._ACCESS_TOKEN},
                            data=json.dumps(msg),
                            headers={'Content-type': 'application/json'})

        if res.status_code != requests.codes.ok:
            res = res.json()
            raise ValueError('Send API response error: {}'.format(
                res['error']['message']))
        else:
            self._model.create_cache(self.cache)
示例#9
0
 def image(self, action):
     debug('Creating image send API object')
     if 'text' in action:
         scheme, *__ = purl(action.get('text'))
         result = self.template.copy()
         result['message']['attachment'] = {
             'type':
             'image',
             'payload': ({
                 'url': action.get('text')
             } if scheme in ['http', 'https'] else {
                 'attachment_id': action.get('text')
             })
         }
         return result
示例#10
0
    def quick_reply(self, action):
        debug('Creating quick_reply send API object')
        if 'buttons' in action:
            if 'text' in action:
                result = self.template.copy()
                result['message']['text'] = action['text']
            else:
                result = self.results[-1]
            for button in action.get('buttons'):
                result['message']['quick_replies'].append({
                    'content_type': 'text',
                    'title': button[0],
                    'payload': button[1]
                })

            if 'text' in action:
                return result
示例#11
0
文件: model.py 项目: vc2309/NLP-Proj
    def get_memory(self):
        if not self.memory:
            result = self._memories.get_item(Key=dict(sender=self._sid))
            item = result.get('Item', {})
            query = item.get('cquery', {})

            self.memory = dict(
                context=item.get('context', NONE),
                cquery=dict(
                    keyword=query.get('keyword', NONE),
                    refinements=query.get('refinements', []),
                    authors=query.get('authors', []),
                    date=query.get('date', [])
                )
            )

        debug(self.memory)
        return self.memory
示例#12
0
 def postback(self, action):
     debug('Creating postback send API object')
     if 'buttons' in action:
         result = self.template.copy()
         result['message']['attachment'] = dict(type='template', payload={})
         result['message']['attachment']['payload'] = dict(
             text=action.get('text'), template_type='button', buttons=[])
         for button in action.get('buttons'):
             if 'postfix' in action:
                 button[1] += self.datetime
             result['message']['attachment']['payload']['buttons'].append({
                 'type':
                 'postback',
                 'title':
                 button[0],
                 'payload':
                 button[1]
             })
         return result
示例#13
0
文件: search.py 项目: vc2309/NLP-Proj
    def start(self, refine=False):
        authors = []
        date = []
        debug(self._caller[1])
        self._caller[1] = list(
            filter(lambda x: x.lower() not in ['video', 'book', 'journal'],
                   self._caller[1]))
        debug(self._caller[1])
        elems = self._caller[0](' '.join(self._caller[1]), authors, date)

        for elem in elems:
            elem = list(filter(lambda x: x, elem))

        if refine:
            remove = len(self._caller[1][1:]) * 2
            if remove < 2:
                remove = 2
            elems = elems[:-remove]

        return authors, date, elems, self._caller[2]
示例#14
0
    def search(self, action):
        debug('Creating search send API object')
        result = {
            'message': {
                'attachment': {
                    'type': 'template',
                    'payload': {
                        'template_type': 'generic',
                        'image_aspect_ratio': 'square',
                        'elements': []
                    }
                },
                'quick_replies': []
            },
            'context': action.get('context')
        }

        for elem in action.get('elems'):
            result['message']['attachment']['payload']['elements'].append({
                'title':
                elem.get('title', 'LEXICA'),
                'image_url':
                elem.get('image', action.get('image')),
                'subtitle':
                elem.get('subtitle', 'LEXICA'),
                'buttons': [{
                    'type': 'web_url',
                    'url': elem.get('url'),
                    'title': 'View'
                }, {
                    'type': 'postback',
                    'title': 'Get a Summary',
                    'payload': 'SUMMARY_' + elem.get('uuid')
                }, {
                    'type': 'postback',
                    'title': 'Reserve a Copy',
                    'payload': 'RESERVE_COPY'
                }]
            })

        return result
示例#15
0
def labels_predict(sentence_tokens, dialogue_tuple):
	debug("Label predict")
	actual_tokens = []
	for sent in sentence_tokens:
		for token in sent['tokens']:
			if token['lemma'].lower() == 'book' and token['pos'].startswith('V'):
				actual_tokens.append('|'.join(list(token['originalText'].lower())))
			else:
				actual_tokens.append(token['originalText'].lower())

	testing_question = ' ' + ' '.join(actual_tokens) + ' '
	for item in sortedlist:
		if testing_question.find(' '+item[0]+' ') > -1:
			testing_question = testing_question.replace(' '+item[0]+' ', ' '+item[1]+' ')
	testing_question = testing_question.replace('|', '')
	testing_question_tokens = testing_question.split()
	gc = 0
	new_list_for_sentences = []
	for ds in range(len(dialogue_tuple[0])):
		new_list_for_sentence = []
		for ts in range(len(dialogue_tuple[0][ds])):
			pos = dialogue_tuple[0][ds][ts][0].split('_')[0]
			word = dialogue_tuple[0][ds][ts][0].split('_')[1]
			new_list_for_sentence.append((pos + '_' + testing_question_tokens[gc], dialogue_tuple[0][ds][ts][1], dialogue_tuple[0][ds][ts][2]))
			gc += 1
		new_list_for_sentences.append(new_list_for_sentence)

	debug(new_list_for_sentences)
	target_features, target_feature_matrix = feature_engineering([new_list_for_sentences])
	target_x = []
	for tf in target_feature_matrix:
		tx = []
		for f in label_all_features:
			if f in tf:
				tx.append(1)
			else:
				tx.append(0)
		target_x.append(tx)
	random_result = label_random_clf.predict_proba(target_x)
	logistic_result = label_logistic_clf.predict_proba(target_x)
	resp_dict = {}
	resp_list = []
	for ri in range(len(logistic_result[0])):
		resp_dict[label_unique_intents[ri]] = (random_result[0][ri], logistic_result[0][ri])
		resp_list.append({"intent": label_unique_intents[ri], "random": random_result[0][ri], "logistic": logistic_result[0][ri], "score": (random_result[0][ri] + logistic_result[0][ri]) / 2})
	sorted_resp_list = sorted(resp_list, key=lambda k: k["score"])
	sorted_resp_list.reverse()
	debug(sorted_resp_list)
	for ri in range(len(sorted_resp_list)):
		sorted_resp_list[ri]['random'] = str(sorted_resp_list[ri]['random'])
		sorted_resp_list[ri]['logistic'] = str(sorted_resp_list[ri]['logistic'])
		sorted_resp_list[ri]['score'] = str(sorted_resp_list[ri]['score'])
	return sorted_resp_list
示例#16
0
def parser(original):
    debug(original, 'Parse Original Message:')
    # message = spell_checker(original)

    # if message:
    message = [original]

    debug('Message After spell check: {}'.format(message))
    tmpslot = dict(
        originalText=[],
        lemma=[],
        pos=[],
        slot_tag=[],
    )

    try:
        slot_response = req_post('http://13.228.72.161:8080/tag',
                                 json={
                                     "q": message
                                 }).json()
        general_slots = slot_response.get('result') or [('LEXICA_SLOT', ) * 4]
        debug('general_slots: {}'.format(general_slots))
        (tmpslot['originalText'], tmpslot['lemma'], tmpslot['pos'],
         tmpslot['slot_tag']) = zip(*general_slots)
    except JSONDecodeError as err:
        debug('JSON error [{}]'.format(err), 'ERROR')

    message = ' '.join(message)
    predict_result = dict(
        zip(('label', 'original', 'stanford'), predict(message)))
    debug(predict_result, name='Predict Result:')
    # predict_result['stanford'] = predict_result.get('stanford')[0]

    label_score = float(predict_result.get('label')[0].get('score'))
    original_score = float(predict_result.get('original')[0].get('score'))

    if label_score > original_score:
        class_probability = predict_result.get('label')
    else:
        class_probability = predict_result.get('original')

    del label_score, original_score, predict_result

    words = list(
        filter(
            None,
            [word.strip() for word in sub(r'[^\w\s]', ' ', message).split()]))

    # create new words for BookSlot_MEMM prevent mutation of words
    book_slot = BookSlot_MEMM(words[:])

    general_slots = list(map(list, zip(*list(tmpslot.values()))))
    book_keyword = ''
    for i, val in enumerate(words):
        if book_slot[i] == 'book':
            book_keyword += ' ' + val

    book_keyword = book_keyword.strip() if book_keyword.strip() else message

    debug(book_slot, name='book_slot: ')
    debug(book_keyword, name='book_keyword: ')
    debug(class_probability, name='class_probability: ')
    debug(general_slots, name='general_slots: ')
    debug(words, name='words: ')

    return message, book_keyword, class_probability, general_slots, words
示例#17
0
文件: memm.py 项目: vc2309/NLP-Proj
def GeneralSlot_MEMM(wordList):

    start_word = "startttt"
    end_word = "endddd"

    for d in range(4):
        wordList.insert(0, start_word)
    for d in range(3):
        wordList.append(end_word)

    tRange = len(bookname_list)
    wRange = len(wordList)

    viterbi = [[0 for x in range(300)]
               for x in range(300)]  # store the highest probabilities value
    backpointer = [['' for x in range(300)] for x in range(300)
                   ]  # store tag that has the highest probabilities value

    for w in range(4, wRange - 2):
        maxViterbi = 0
        maxViterbiList = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        maxPreviousState = 0

        for t in range(tRange):
            for i in range(tRange):

                if w == 4:
                    probability = general_maxent_classifier.prob_classify(
                        GeneralSlot_MEMM_features(
                            wordList[w - 4], wordList[w - 3], wordList[w - 2],
                            wordList[w - 1], wordList[w], wordList[w + 1],
                            wordList[w + 2], bookname_list[0]))
                    posterior = float(probability.prob(bookname_list[t])) + 1

                    if posterior > maxViterbiList[t]:
                        maxViterbiList[t] = posterior

                    if posterior > maxViterbi:
                        maxViterbi = posterior
                        maxPreviousState = i

                if w > 4:
                    probability = general_maxent_classifier.prob_classify(
                        GeneralSlot_MEMM_features(
                            wordList[w - 4], wordList[w - 3], wordList[w - 2],
                            wordList[w - 1], wordList[w], wordList[w + 1],
                            wordList[w + 2], bookname_list[i]))
                    posterior = float(probability.prob(bookname_list[t])) + 1

                    if float(viterbi[i][w - 1]) * posterior > maxViterbi:
                        maxViterbi = float(viterbi[i][w - 1]) * posterior
                        maxPreviousState = i

                    if float(
                            viterbi[i][w - 1]) * posterior > maxViterbiList[t]:
                        maxViterbiList[t] = float(
                            viterbi[i][w - 1]) * posterior

            viterbi[t][w] = maxViterbiList[t] / 2
            backpointer[t][w] = bookname_list[maxPreviousState]

    maxPrevTag = bookname_list[maxPreviousState]
    path = [maxPrevTag]

    for i in range(wRange - 4, 4, -1):
        # debug(i-3)
        # debug(wordList[i])
        # debug(maxPrevTag)
        # debug("\n")
        maxPrevTag = backpointer[maxPreviousState][i]
        path.insert(0, maxPrevTag)
        maxPreviousState = bookname_list.index(maxPrevTag)

    debug(1)
    debug(wordList[4])
    debug(maxPrevTag)
    debug("\n")

    return path
示例#18
0
文件: model.py 项目: vc2309/NLP-Proj
 def get_response(self, intent):
     debug(intent, 'geting response for intent:')
     return self._resources.get_item(Key={
         'type': 'responses',
         'key': str(intent)
     }).get('Item', {}).get('value')
示例#19
0
import nltk
from nltk.stem.porter import *
from nltk.classify import MaxentClassifier
import pickle
import os,sys
import re
from io import open
import string
import time
#from nltk.tag import StanfordNERTagger
from chatbot.helper import debug

# reload(sys)
# sys.setdefaultencoding('utf8')

 debug("start program ", time.strftime('%X %x %Z'))

# from itertools import zip
from itertools import groupby

#bookname_list = ['not', 'LIBRARY', 'GRADUATE', 'FACILITY', 'DATE', 'MEMBERSHIP', 'PURPOSE', 'SUBJECT', 'LOCATION', 'UNIVERSITY', 'STUDENTTYPE']
labeled_features = []


# testing_file = open("title_questsions_test.txt", "r")
# output_file = open("book_name_output.txt", "wb")

#****************************************************************building input features

#Comment out this section to remove universal label
bookname_list=['not']
示例#20
0
文件: model.py 项目: vc2309/NLP-Proj
 def create_cache(self, cache):
     debug((
         'sender({sender}) datetime({datetime}) '
         'message({message}) original message({original_message})'
     ).format(**cache), 'create new cache')
     self._history.put_item(Item=clean_empty(cache))
示例#21
0
 def text(self, action):
     debug('Creating text send API object')
     if 'text' in action:
         result = self.template.copy()
         result['message']['text'] = action.get('text')
         return result