def response(msg): with open("relationships.json") as data_file: relationships = json.load(data_file) key_words = emulate_util.key_words(msg) final_response = "" final_highest_value = 0.0 for word in key_words: if word in relationships: best_response = "" highest_value = 0.0 sum = 0.0 for response in relationships[word]: if relationships[word][response] > highest_value: highest_value = relationships[word][response] best_response = response sum += relationships[word][response] highest_value = highest_value / sum if highest_value > final_highest_value: final_highest_value = highest_value final_response = best_response return final_response
def generate_response(input, debug=0): output = "" with open("utils/emulate/relationships.json") as data_file: relationship_data = json.load(data_file) key_words = emulate_util.key_words(input) total_sentence_weight = sum(key_words.values()) * 1.0 print input #convert to % then inverse (The result is a %) for key_word in key_words: key_words[key_word] = (1.0 - (key_words[key_word] / total_sentence_weight)) print key_word + ": " + str(key_words[key_word] * 1) + "" #these are the "result words" which come from our key words in the sentence #The algorithm = key_word(% score) * (result_word(score) / occurance of results word) result_set = defaultdict(int) for key_word in key_words: if key_word in relationship_data: for result_word in relationship_data[key_word]: occurance = emulate_util.word_occurance(result_word) * 2 word_score = 0 if occurance > 0: word_score = key_words[key_word] * ( relationship_data[key_word][result_word] / occurance) if word_score > .4: result_set[result_word] += word_score # for result_word in sorted(result_set, key=result_set.get): #print result_word + " : " + str(result_set[result_word]) #Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value #final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF} final_words = dict( sorted(result_set.iteritems(), key=operator.itemgetter(1), reverse=True)[:5]) print final_words return make_sentence(final_words.keys())
def generate_response(input, debug=0): output = "" with open("utils/emulate/relationships.json") as data_file: relationship_data = json.load(data_file) key_words = emulate_util.key_words(input) total_sentence_weight = sum(key_words.values()) * 1.0 print input # convert to % then inverse (The result is a %) for key_word in key_words: key_words[key_word] = 1.0 - (key_words[key_word] / total_sentence_weight) print key_word + ": " + str(key_words[key_word] * 1) + "" # these are the "result words" which come from our key words in the sentence # The algorithm = key_word(% score) * (result_word(score) / occurance of results word) result_set = defaultdict(int) for key_word in key_words: if key_word in relationship_data: for result_word in relationship_data[key_word]: occurance = emulate_util.word_occurance(result_word) * 2 word_score = 0 if occurance > 0: word_score = key_words[key_word] * (relationship_data[key_word][result_word] / occurance) if word_score > 0.4: result_set[result_word] += word_score # for result_word in sorted(result_set, key=result_set.get): # print result_word + " : " + str(result_set[result_word]) # Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value # final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF} final_words = dict(sorted(result_set.iteritems(), key=operator.itemgetter(1), reverse=True)[:5]) print final_words return make_sentence(final_words.keys())
response_time = time - previous_message["created_at"] if response_time > 50 or previous_message[ "text"] == None or text == None or message[ "user_id"] == 219313 or previous_message[ "user_id"] == 219313 or text.find("#") != -1: #print "Found one - " + previous_message["text"] + " -> " + text continue #the next message might not have been tied to this one or one of the messages was blank #also ignore all messages responding to sUN or from sUN message_weight = {} previous_message_weight = {} #Find Important words message_weight_temp = emulate_util.key_words(text) previous_message_weight_temp = emulate_util.key_words( previous_message["text"]) #Add Type of Speech message_weight_pos = nltk.pos_tag(nltk.word_tokenize(text)) previous_message_weight_pos = nltk.pos_tag( nltk.word_tokenize(previous_message["text"])) for word in message_weight_temp: for word_with_pos in message_weight_pos: if word == word_with_pos[0].lower(): message_weight[ word + "#" + word_with_pos[1]] = message_weight_temp[word]
break #we hit the last message! previous_message = message_data[index + 1] response_time = time - previous_message["created_at"] if response_time > 50 or previous_message["text"] == None or text == None or message["user_id"] == 219313 or previous_message["user_id"] == 219313 or text.find("#") != -1: #print "Found one - " + previous_message["text"] + " -> " + text continue #the next message might not have been tied to this one or one of the messages was blank #also ignore all messages responding to sUN or from sUN message_weight = {} previous_message_weight = {} #Find Important words message_weight_temp = emulate_util.key_words(text) previous_message_weight_temp = emulate_util.key_words(previous_message["text"]) #Add Type of Speech message_weight_pos = nltk.pos_tag(nltk.word_tokenize(text)) previous_message_weight_pos = nltk.pos_tag(nltk.word_tokenize(previous_message["text"])) for word in message_weight_temp: for word_with_pos in message_weight_pos: if word == word_with_pos[0].lower(): message_weight[word + "#" + word_with_pos[1]] = message_weight_temp[word] for word in previous_message_weight_temp: for word_with_pos in previous_message_weight_pos: if word == word_with_pos[0].lower(): previous_message_weight[word + "#" + word_with_pos[1]] = previous_message_weight_temp[word]
def generate_response(input, debug = 0): output = "" with open("utils/emulate/relationships.json") as data_file: relationship_data = json.load(data_file) key_words_temp = emulate_util.key_words(input) key_words = {} total_sentence_weight = sum(key_words_temp.values()) * 1.0 if len(key_words_temp) < 1: return "I didn't understand your dumbass message" if debug: output += "Input Keywords: " #convert key_word to add Type of Speech key_word_pos = nltk.pos_tag(nltk.word_tokenize(input)) for word in key_words_temp: for word_with_pos in key_word_pos: if word == word_with_pos[0].lower(): key_words[word + "#" + word_with_pos[1]] = key_words_temp[word] #if Multiple words, convert to % then inverse (The result is a %) if len(key_words) > 1: for key_word in key_words: key_words[key_word] = (1.0 - (key_words[key_word] / total_sentence_weight)) if debug: output += key_word + ": " + str(key_words[key_word] * 1) + "\n" #these are the "result words" which come from our key words in the sentence #The algorithm = key_word(% score) * (result_word(score) / occurance of results word) result_set = defaultdict(int) #pdb.set_trace() for key_word in key_words: if key_word in relationship_data: for result_word in relationship_data[key_word]: occurance = emulate_util.word_occurance(result_word.split("#", 1)[0]) * 2 word_score = 0 if occurance > 0: word_score = key_words[key_word] * ( relationship_data[key_word][result_word] / occurance ) result_set[result_word] += word_score # for result_word in sorted(result_set, key=result_set.get): #print result_word + " : " + str(result_set[result_word]) #Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value #final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF} sorted_words = dict(sorted(result_set.iteritems(), key=operator.itemgetter(1), reverse=True)[:5]) final_words = [] for word in sorted_words: final_words.append(word.split("#", 1)[0]) #pdb.set_trace() if debug: output += "Response Keywords: " + str(final_words) + "\n\n" if len(final_words) < 1: output += "I don't understand..." else: output += make_sentence(final_words) return output
def generate_response(input, debug=0): output = "" with open("utils/emulate/relationships.json") as data_file: relationship_data = json.load(data_file) key_words_temp = emulate_util.key_words(input) key_words = {} total_sentence_weight = sum(key_words_temp.values()) * 1.0 if len(key_words_temp) < 1: return "I didn't understand your dumbass message" if debug: output += "Input Keywords: " #convert key_word to add Type of Speech key_word_pos = nltk.pos_tag(nltk.word_tokenize(input)) for word in key_words_temp: for word_with_pos in key_word_pos: if word == word_with_pos[0].lower(): key_words[word + "#" + word_with_pos[1]] = key_words_temp[word] #if Multiple words, convert to % then inverse (The result is a %) if len(key_words) > 1: for key_word in key_words: key_words[key_word] = ( 1.0 - (key_words[key_word] / total_sentence_weight)) if debug: output += key_word + ": " + str(key_words[key_word] * 1) + "\n" #these are the "result words" which come from our key words in the sentence #The algorithm = key_word(% score) * (result_word(score) / occurance of results word) result_set = defaultdict(int) #pdb.set_trace() for key_word in key_words: if key_word in relationship_data: for result_word in relationship_data[key_word]: occurance = emulate_util.word_occurance( result_word.split("#", 1)[0]) * 2 word_score = 0 if occurance > 0: word_score = key_words[key_word] * ( relationship_data[key_word][result_word] / occurance) result_set[result_word] += word_score # for result_word in sorted(result_set, key=result_set.get): #print result_word + " : " + str(result_set[result_word]) #Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value #final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF} sorted_words = dict( sorted(result_set.iteritems(), key=operator.itemgetter(1), reverse=True)[:5]) final_words = [] for word in sorted_words: final_words.append(word.split("#", 1)[0]) #pdb.set_trace() if debug: output += "Response Keywords: " + str(final_words) + "\n\n" if len(final_words) < 1: output += "I don't understand..." else: output += make_sentence(final_words) return output