Пример #1
0
def query(entity, attr):
    soup = To.get_html_baidu("http://baike.baidu.com/item/" + entity)
    basicInfo_block = soup.find(class_='basic-info cmn-clearfix')
    if basicInfo_block == None:
        # print 'info None'
        return attr + "::找不到"
    else:
        info = get_info(basicInfo_block)
        # for i in info:
        #     print i
        #     print info[i]
        # print '-----------'
        if info.has_key(attr.decode('utf8')):
            # print 'has key'+attr.decode('utf8')
            return info[attr.decode('utf8')]
        else:
            # print 'no key 进行同义词判断'
            # 同义词判断
            attr_list = T.load_baikeattr_name(
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(
                attr,
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/SynonDic.txt', attr_list)
            if info.has_key(attr.decode('utf8')):
                return info[attr.decode('utf8')]
            else:
                return attr + "::找不到"
Пример #2
0
def query(entity, attr):
    entity_uri = 'http://baike.baidu.com/item/' + entity
    result = '查询百科列表实体:' + entity_uri + '\n'
    soup = To.get_html_baidu(entity_uri)
    basicInfo_block = soup.find(class_='basic-info cmn-clearfix')
    if basicInfo_block == None:
        return result + entity + "-找不到\n"
    else:
        info = get_info(basicInfo_block)
        if attr in info:
            return info[attr]
        else:
            # 同义词判断
            result += '属性' + attr + '-找不到\n'
            attr_list = T.load_baikeattr_name(
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(
                attr,
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/SynonDic.txt', attr_list)
            if attr in info:
                return info[attr]
            else:
                return result + '同义属性' + attr + '-找不到\n'