def query(entity, attr): soup = To.get_html_baidu("http://baike.baidu.com/item/" + entity) basicInfo_block = soup.find(class_='basic-info cmn-clearfix') if basicInfo_block == None: # print 'info None' return attr + "::找不到" else: info = get_info(basicInfo_block) # for i in info: # print i # print info[i] # print '-----------' if info.has_key(attr.decode('utf8')): # print 'has key'+attr.decode('utf8') return info[attr.decode('utf8')] else: # print 'no key 进行同义词判断' # 同义词判断 attr_list = T.load_baikeattr_name( os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) + '/resources/Attribute_name.txt') attr = T.load_synonyms_word_inattr( attr, os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) + '/resources/SynonDic.txt', attr_list) if info.has_key(attr.decode('utf8')): return info[attr.decode('utf8')] else: return attr + "::找不到"
def query(entity,attr): soup = To.get_html_baike("http://baike.baidu.com/item/"+entity) basicInfo_block = soup.find(class_= 'basic-info cmn-clearfix') if not basicInfo_block: return '找不到' else: info = get_info(basicInfo_block) ##print(info) if info.get(attr): return info[attr].strip() else: attr_list = T.load_baikeattr_name(os.path.dirname(os.path.split(os.path.realpath(__file__))[0])+'/resources/Attribute_name.txt') attr = T.load_synonyms_word_inattr(attr,os.path.dirname(os.path.split(os.path.realpath(__file__))[0])+'/resources/SynonDic.txt',attr_list) if info.get(attr): return info[attr].strip() else: return '找不到'