def text(): print('Receiving text...') message = request.values.get('Body', None) person = request.values.get('From', None) jdata = {} if "textReg.json" in os.listdir("."): with open("textReg.json", "r") as jfile: jdata = json.load(jfile) if len(message.split()) > 1 and message.split()[0] == "switch": jdata[person] = message.split()[1] if person in jdata: gamename = jdata[person] + "@nathanp.me" else: gamename = '*****@*****.**' jdata[person] = "treehacks" with open("textReg.json", "w") as jfile: json.dump(jdata, jfile) if not message or not person: return 'Incorrect POST data' TextProcess.evalAndRespond(person, message, gamename) return 'OK'
def email(): print('Receiving email...') email = request.values.get('to', None) person = request.values.get('from', None) text = request.values.get('text', None) if not email or not person: return "Incorrect POST data" if not text: text = "" TextProcess.evalAndRespond(person, text, email) return 'OK'
def query(entity,attr): soup = To.get_html_baidu("http://baike.baidu.com/item/"+entity) basicInfo_block = soup.find(class_= 'basic-info cmn-clearfix') if basicInfo_block == None: # print 'info None' return attr + "::找不到" else: info = get_info(basicInfo_block) # for i in info: # print i # print info[i] # print '-----------' if info.has_key(attr.decode('utf8')): # print 'has key'+attr.decode('utf8') return info[attr.decode('utf8')] else: # print 'no key 进行同义词判断' # 同义词判断 attr_list = T.load_baikeattr_name('./resources/Attribute_name.txt') attr = T.load_synonyms_word_inattr(attr,'./resources/SynonDic.txt',attr_list) if info.has_key(attr.decode('utf8')): return info[attr.decode('utf8')] else: return attr + "::找不到"
def kwquery(query): #分词 去停用词 抽取关键词 keywords = [] words = T.postag(query) for k in words: # 只保留名词 if k.flag.__contains__("n"): # print k.flag # print k.word keywords.append(k.word) answer = [] text = '' # 找到百科的答案就置1 flag = 0 # 抓取百度前10条的摘要 soup_baidu = To.get_html_baidu('https://www.baidu.com/s?wd=' + quote(query)) for i in range(1, 10): if soup_baidu == None: break results = soup_baidu.find(id=i) if results == None: print "百度摘要找不到答案" break # print '=============' # print results.attrs # print type(results.attrs) # print results['class'] #判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案 if results.attrs.has_key('mu') and i == 1: # print results.attrs["mu"] r = results.find(class_='op_exactqa_s_answer') # print r if r == None: print "百度知识图谱找不到答案" # continue else: # print r.get_text() print "百度知识图谱找到答案" answer.append(r.get_text().strip()) flag = 1 break #古诗词盘判断 if results.attrs.has_key('mu') and i == 1: r = results.find(class_="op_exactqa_detail_s_answer") if r == None: print "百度诗词找不到答案" # continue else: # print r.get_text() print "百度诗词找到答案" answer.append(r.get_text().strip()) flag = 1 break #计算器 if results.attrs.has_key( 'mu') and i == 1 and results.attrs['mu'].__contains__( 'http://open.baidu.com/static/calculator/calculator.html'): r = results.find('div').find_all('td')[1].find_all('div')[1] if r == None: print "计算器找不到答案" # continue else: # print r.get_text() print "计算器找到答案" answer.append(r.get_text().strip()) flag = 1 break # 百度知道答案 if results.attrs.has_key('mu') and i == 1: r = results.find(class_='op_best_answer_question_link') if r == None: print "百度知道图谱找不到答案" else: print "百度知道图谱找到答案" url = r['href'] zhidao_soup = To.get_html_zhidao(url) r = zhidao_soup.find(class_='bd answer').find('pre') answer.append(r.get_text()) flag = 1 break if results.find("h3") != None: if results.find("h3").find("a").get_text().__contains__( u"百度知道") and i == 1: url = results.find("h3").find("a")['href'] if url == None: print "百度知道图谱找不到答案" continue else: print "百度知道图谱找到答案" zhidao_soup = To.get_html_zhidao(url) r = zhidao_soup.find(class_='bd answer') if r == None: continue else: r = r.find('pre') answer.append(r.get_text().strip()) flag = 1 break text += results.get_text() if flag == 1: return answer #获取bing的摘要 soup_bing = To.get_html_bing('https://www.bing.com/search?q=' + quote(query)) # 判断是否在Bing的知识图谱中 bingbaike = soup_bing.find(class_="b_xlText b_emphText") if bingbaike != None: print "Bing知识图谱找到答案" flag = 1 answer.append(bingbaike.get_text()) # print "=====" # print answer # print "=====" return answer else: print "Bing知识图谱找不到答案" results = soup_bing.find(id="b_results") text += results.get_text() # print text # 如果再两家搜索引擎的知识图谱中都没找到答案,那么就分析摘要 if flag == 0: #分句 cutlist = [u"。", u"?", u".", u"_", u"-", u":", u"!", u"?"] temp = '' sentences = [] for i in range(0, len(text)): if text[i] in cutlist: if temp == '': continue else: # print temp sentences.append(temp) temp = '' else: temp += text[i] # 找到含有关键词的句子,去除无关的句子 key_sentences = {} for s in sentences: for k in keywords: if k in s: key_sentences[s] = 1 # 根据问题制定规则 # 识别人名 target_list = {} for ks in key_sentences: # print ks words = T.postag(ks) for w in words: # print "=====" # print w.word if w.flag == ("nr"): if target_list.has_key(w.word): target_list[w.word] += 1 else: target_list[w.word] = 1 # 找出最大词频 sorted_lists = sorted(target_list.items(), lambda x, y: cmp(x[1], y[1]), reverse=True) # print len(target_list) #去除问句中的关键词 sorted_lists2 = [] # 候选队列 for i, st in enumerate(sorted_lists): # print st[0] if st[0] in keywords: continue else: sorted_lists2.append(st) print "返回前n个词频" answer = [] for i, st in enumerate(sorted_lists2): # print st[0] # print st[1] if i < 3: # print st[0] # print st[1] answer.append(st[0]) # print answer return answer
#coding:utf8 import aiml import os import TextProcess as T import Tools as QAT from QACrawler import baike from QACrawler import search_summary if __name__ == '__main__': #初始化jb分词器 T.jieba_initialize() #切换到语料库所在工作目录 mybot_path = './' os.chdir(mybot_path) mybot = aiml.Kernel() mybot.learn("./resources/std-startup.xml") mybot.respond('Load Doc Snake') #载入百科属性列表 print ''' .----------------. .-----------------. .----------------. .----------------. .----------------. | .--------------. || .--------------. || .--------------. || .--------------. || .--------------. | | | _______ | || | ____ _____ | || | __ | || | ___ ____ | || | _________ | | | | / ___ | | || ||_ \|_ _| | || | / \ | || | |_ ||_ _| | || | |_ ___ | | | | | | (__ \_| | || | | \ | | | || | / /\ \ | || | | |_/ / | || | | |_ \_| | | | | '.___`-. | || | | |\ \| | | || | / /__\ \ | || | | __'. | || | | _| _ | | | | |`\____) | | || | _| |_\ |_ | || | _/ / \ \_ | || | _| | \ \_ | || | _| |___/ | | | | | |_______.' | || ||_____|\____| | || ||____| |____|| || | |____||____| | || | |_________| | |