def original_predict(dialogue_tuple): debug("Original predict") target_features, target_feature_matrix = feature_engineering(dialogue_tuple) target_x = [] for tf in target_feature_matrix: tx = [] for f in original_all_features: if f in tf: tx.append(1) else: tx.append(0) target_x.append(tx) random_result = original_random_clf.predict_proba(target_x) logistic_result = original_logistic_clf.predict_proba(target_x) resp_dict = {} resp_list = [] for ri in range(len(logistic_result[0])): resp_dict[original_unique_intents[ri]] = (random_result[0][ri], logistic_result[0][ri]) resp_list.append({"intent": original_unique_intents[ri], "random": random_result[0][ri], "logistic": logistic_result[0][ri], "score": (random_result[0][ri] + logistic_result[0][ri]) / 2}) sorted_resp_list = sorted(resp_list, key=lambda k: k["score"]) sorted_resp_list.reverse() debug(sorted_resp_list) for ri in range(len(sorted_resp_list)): sorted_resp_list[ri]['random'] = str(sorted_resp_list[ri]['random']) sorted_resp_list[ri]['logistic'] = str(sorted_resp_list[ri]['logistic']) sorted_resp_list[ri]['score'] = str(sorted_resp_list[ri]['score']) return sorted_resp_list
def upload_memory(self): if isinstance(self.memory, str): self.memory = loads(self.memory) if not isinstance(self.memory, dict): self.memory = {} memory = self.memory.copy() cquery = memory.get('cquery', {}) self.memory = dict( context=memory.get('context', NONE) or NONE, cquery=dict( keyword=cquery.get('keyword', NONE) or NONE, refinements=list(set(cquery.get('refinements', []) or [])), authors=cquery.get('authors', []) or [], date=cquery.get('date', []) or [], ), sender=int(self._sid) ) # debug(self.memory, 'Before Sanitized Memory:') self.memory = clean_empty(self.memory) # debug(self.memory, 'After Sanitized Memory:') self._memories.put_item(Item=self.memory) debug('Updated memory on dynamodb.') debug(self.memory, 'DynamoDB memory:')
def receive(self, evt): message = Message(evt, self._model) # get back results context, results, cache = message.analyse() # save last result to dynamodb cache['sender'] = self._sid cache['datetime'] = int(results[-1].get('datetime', time.time())) cache['reply'] = results[-1] self.cache = cache.copy() del cache # for each result send api request for result in results: self.send(result) # print out the context and update memory = self._model.memory memory['context'] = context or memory['context'] del context debug(memory.get('context'), 'context') # update dynamodb memory self._model.upload_memory() # return 200 debug(evt, 'Finished process request from sender({}):'.format(self._sid)) return ('Offical Response.', 200)
def __init__(self, sid, rid): self._sid = sid self._rid = rid (self._ACCESS_TOKEN, self._UNIVERSITY, self._Special_case_list) = get_setup(rid, sid) if not self._ACCESS_TOKEN: debug('Recipient ({}) does not exist.'.format(rid), 'WARNING') return ('Done', 200) self._model = Model(rid, sid, self._UNIVERSITY) self._sender_details = get_user_details(self._ACCESS_TOKEN, sid)
def __init__(self, original, payload, model, template, results): self._actions, self._payload = model.get_payload(payload).values() self._model = model self._template = template self._results = results self._original = original self.msg = original self.predictions = [] self.general_slots = [] if self._payload: self.msg = '' debug('Incoming payload: {}'.format(self._payload))
def search_result(self, resultobj): debug('creating new search result') for result in resultobj: tmp = clean_empty(result.copy()) self._result.put_item(Item=dict( image=tmp.get('image', NONE), subtitle=tmp.get('subtitle', NONE), summary=tmp.get('summary', NONE), title=tmp.get('title', NONE), url=tmp.get('url', NONE), id=tmp.get('uuid', NONE) ))
def predict(target_message): # debug(target_message) pre, stanford_responses = nlp_preprocess([target_message]) stanford_response = stanford_responses[0] label_result = labels_predict(stanford_response, pre) original_result = original_predict(pre) debug(label_result[:3]) debug(original_result[:3]) return label_result, original_result, stanford_responses # predict('I am looking for visting the law library this coming thursday and next tuesday. Can I')
def send(self, msg): msg['recipient'] = {'id': self._sid} msg = clean_empty(msg) debug(msg, 'Before send to Send API') res = requests.post('https://graph.facebook.com/v2.6/me/messages', params={"access_token": self._ACCESS_TOKEN}, data=json.dumps(msg), headers={'Content-type': 'application/json'}) if res.status_code != requests.codes.ok: res = res.json() raise ValueError('Send API response error: {}'.format( res['error']['message'])) else: self._model.create_cache(self.cache)
def image(self, action): debug('Creating image send API object') if 'text' in action: scheme, *__ = purl(action.get('text')) result = self.template.copy() result['message']['attachment'] = { 'type': 'image', 'payload': ({ 'url': action.get('text') } if scheme in ['http', 'https'] else { 'attachment_id': action.get('text') }) } return result
def quick_reply(self, action): debug('Creating quick_reply send API object') if 'buttons' in action: if 'text' in action: result = self.template.copy() result['message']['text'] = action['text'] else: result = self.results[-1] for button in action.get('buttons'): result['message']['quick_replies'].append({ 'content_type': 'text', 'title': button[0], 'payload': button[1] }) if 'text' in action: return result
def get_memory(self): if not self.memory: result = self._memories.get_item(Key=dict(sender=self._sid)) item = result.get('Item', {}) query = item.get('cquery', {}) self.memory = dict( context=item.get('context', NONE), cquery=dict( keyword=query.get('keyword', NONE), refinements=query.get('refinements', []), authors=query.get('authors', []), date=query.get('date', []) ) ) debug(self.memory) return self.memory
def postback(self, action): debug('Creating postback send API object') if 'buttons' in action: result = self.template.copy() result['message']['attachment'] = dict(type='template', payload={}) result['message']['attachment']['payload'] = dict( text=action.get('text'), template_type='button', buttons=[]) for button in action.get('buttons'): if 'postfix' in action: button[1] += self.datetime result['message']['attachment']['payload']['buttons'].append({ 'type': 'postback', 'title': button[0], 'payload': button[1] }) return result
def start(self, refine=False): authors = [] date = [] debug(self._caller[1]) self._caller[1] = list( filter(lambda x: x.lower() not in ['video', 'book', 'journal'], self._caller[1])) debug(self._caller[1]) elems = self._caller[0](' '.join(self._caller[1]), authors, date) for elem in elems: elem = list(filter(lambda x: x, elem)) if refine: remove = len(self._caller[1][1:]) * 2 if remove < 2: remove = 2 elems = elems[:-remove] return authors, date, elems, self._caller[2]
def search(self, action): debug('Creating search send API object') result = { 'message': { 'attachment': { 'type': 'template', 'payload': { 'template_type': 'generic', 'image_aspect_ratio': 'square', 'elements': [] } }, 'quick_replies': [] }, 'context': action.get('context') } for elem in action.get('elems'): result['message']['attachment']['payload']['elements'].append({ 'title': elem.get('title', 'LEXICA'), 'image_url': elem.get('image', action.get('image')), 'subtitle': elem.get('subtitle', 'LEXICA'), 'buttons': [{ 'type': 'web_url', 'url': elem.get('url'), 'title': 'View' }, { 'type': 'postback', 'title': 'Get a Summary', 'payload': 'SUMMARY_' + elem.get('uuid') }, { 'type': 'postback', 'title': 'Reserve a Copy', 'payload': 'RESERVE_COPY' }] }) return result
def labels_predict(sentence_tokens, dialogue_tuple): debug("Label predict") actual_tokens = [] for sent in sentence_tokens: for token in sent['tokens']: if token['lemma'].lower() == 'book' and token['pos'].startswith('V'): actual_tokens.append('|'.join(list(token['originalText'].lower()))) else: actual_tokens.append(token['originalText'].lower()) testing_question = ' ' + ' '.join(actual_tokens) + ' ' for item in sortedlist: if testing_question.find(' '+item[0]+' ') > -1: testing_question = testing_question.replace(' '+item[0]+' ', ' '+item[1]+' ') testing_question = testing_question.replace('|', '') testing_question_tokens = testing_question.split() gc = 0 new_list_for_sentences = [] for ds in range(len(dialogue_tuple[0])): new_list_for_sentence = [] for ts in range(len(dialogue_tuple[0][ds])): pos = dialogue_tuple[0][ds][ts][0].split('_')[0] word = dialogue_tuple[0][ds][ts][0].split('_')[1] new_list_for_sentence.append((pos + '_' + testing_question_tokens[gc], dialogue_tuple[0][ds][ts][1], dialogue_tuple[0][ds][ts][2])) gc += 1 new_list_for_sentences.append(new_list_for_sentence) debug(new_list_for_sentences) target_features, target_feature_matrix = feature_engineering([new_list_for_sentences]) target_x = [] for tf in target_feature_matrix: tx = [] for f in label_all_features: if f in tf: tx.append(1) else: tx.append(0) target_x.append(tx) random_result = label_random_clf.predict_proba(target_x) logistic_result = label_logistic_clf.predict_proba(target_x) resp_dict = {} resp_list = [] for ri in range(len(logistic_result[0])): resp_dict[label_unique_intents[ri]] = (random_result[0][ri], logistic_result[0][ri]) resp_list.append({"intent": label_unique_intents[ri], "random": random_result[0][ri], "logistic": logistic_result[0][ri], "score": (random_result[0][ri] + logistic_result[0][ri]) / 2}) sorted_resp_list = sorted(resp_list, key=lambda k: k["score"]) sorted_resp_list.reverse() debug(sorted_resp_list) for ri in range(len(sorted_resp_list)): sorted_resp_list[ri]['random'] = str(sorted_resp_list[ri]['random']) sorted_resp_list[ri]['logistic'] = str(sorted_resp_list[ri]['logistic']) sorted_resp_list[ri]['score'] = str(sorted_resp_list[ri]['score']) return sorted_resp_list
def parser(original): debug(original, 'Parse Original Message:') # message = spell_checker(original) # if message: message = [original] debug('Message After spell check: {}'.format(message)) tmpslot = dict( originalText=[], lemma=[], pos=[], slot_tag=[], ) try: slot_response = req_post('http://13.228.72.161:8080/tag', json={ "q": message }).json() general_slots = slot_response.get('result') or [('LEXICA_SLOT', ) * 4] debug('general_slots: {}'.format(general_slots)) (tmpslot['originalText'], tmpslot['lemma'], tmpslot['pos'], tmpslot['slot_tag']) = zip(*general_slots) except JSONDecodeError as err: debug('JSON error [{}]'.format(err), 'ERROR') message = ' '.join(message) predict_result = dict( zip(('label', 'original', 'stanford'), predict(message))) debug(predict_result, name='Predict Result:') # predict_result['stanford'] = predict_result.get('stanford')[0] label_score = float(predict_result.get('label')[0].get('score')) original_score = float(predict_result.get('original')[0].get('score')) if label_score > original_score: class_probability = predict_result.get('label') else: class_probability = predict_result.get('original') del label_score, original_score, predict_result words = list( filter( None, [word.strip() for word in sub(r'[^\w\s]', ' ', message).split()])) # create new words for BookSlot_MEMM prevent mutation of words book_slot = BookSlot_MEMM(words[:]) general_slots = list(map(list, zip(*list(tmpslot.values())))) book_keyword = '' for i, val in enumerate(words): if book_slot[i] == 'book': book_keyword += ' ' + val book_keyword = book_keyword.strip() if book_keyword.strip() else message debug(book_slot, name='book_slot: ') debug(book_keyword, name='book_keyword: ') debug(class_probability, name='class_probability: ') debug(general_slots, name='general_slots: ') debug(words, name='words: ') return message, book_keyword, class_probability, general_slots, words
def GeneralSlot_MEMM(wordList): start_word = "startttt" end_word = "endddd" for d in range(4): wordList.insert(0, start_word) for d in range(3): wordList.append(end_word) tRange = len(bookname_list) wRange = len(wordList) viterbi = [[0 for x in range(300)] for x in range(300)] # store the highest probabilities value backpointer = [['' for x in range(300)] for x in range(300) ] # store tag that has the highest probabilities value for w in range(4, wRange - 2): maxViterbi = 0 maxViterbiList = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] maxPreviousState = 0 for t in range(tRange): for i in range(tRange): if w == 4: probability = general_maxent_classifier.prob_classify( GeneralSlot_MEMM_features( wordList[w - 4], wordList[w - 3], wordList[w - 2], wordList[w - 1], wordList[w], wordList[w + 1], wordList[w + 2], bookname_list[0])) posterior = float(probability.prob(bookname_list[t])) + 1 if posterior > maxViterbiList[t]: maxViterbiList[t] = posterior if posterior > maxViterbi: maxViterbi = posterior maxPreviousState = i if w > 4: probability = general_maxent_classifier.prob_classify( GeneralSlot_MEMM_features( wordList[w - 4], wordList[w - 3], wordList[w - 2], wordList[w - 1], wordList[w], wordList[w + 1], wordList[w + 2], bookname_list[i])) posterior = float(probability.prob(bookname_list[t])) + 1 if float(viterbi[i][w - 1]) * posterior > maxViterbi: maxViterbi = float(viterbi[i][w - 1]) * posterior maxPreviousState = i if float( viterbi[i][w - 1]) * posterior > maxViterbiList[t]: maxViterbiList[t] = float( viterbi[i][w - 1]) * posterior viterbi[t][w] = maxViterbiList[t] / 2 backpointer[t][w] = bookname_list[maxPreviousState] maxPrevTag = bookname_list[maxPreviousState] path = [maxPrevTag] for i in range(wRange - 4, 4, -1): # debug(i-3) # debug(wordList[i]) # debug(maxPrevTag) # debug("\n") maxPrevTag = backpointer[maxPreviousState][i] path.insert(0, maxPrevTag) maxPreviousState = bookname_list.index(maxPrevTag) debug(1) debug(wordList[4]) debug(maxPrevTag) debug("\n") return path
def get_response(self, intent): debug(intent, 'geting response for intent:') return self._resources.get_item(Key={ 'type': 'responses', 'key': str(intent) }).get('Item', {}).get('value')
import nltk from nltk.stem.porter import * from nltk.classify import MaxentClassifier import pickle import os,sys import re from io import open import string import time #from nltk.tag import StanfordNERTagger from chatbot.helper import debug # reload(sys) # sys.setdefaultencoding('utf8') debug("start program ", time.strftime('%X %x %Z')) # from itertools import zip from itertools import groupby #bookname_list = ['not', 'LIBRARY', 'GRADUATE', 'FACILITY', 'DATE', 'MEMBERSHIP', 'PURPOSE', 'SUBJECT', 'LOCATION', 'UNIVERSITY', 'STUDENTTYPE'] labeled_features = [] # testing_file = open("title_questsions_test.txt", "r") # output_file = open("book_name_output.txt", "wb") #****************************************************************building input features #Comment out this section to remove universal label bookname_list=['not']
def create_cache(self, cache): debug(( 'sender({sender}) datetime({datetime}) ' 'message({message}) original message({original_message})' ).format(**cache), 'create new cache') self._history.put_item(Item=clean_empty(cache))
def text(self, action): debug('Creating text send API object') if 'text' in action: result = self.template.copy() result['message']['text'] = action.get('text') return result