示例#1
0
文件: seo.py 项目: e3gs/flask-boot
def analyze_keyword(k):
    """
    分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字.
    """
    keyword = Keyword.find_one({'name': k})
    if not keyword:
        print 'Keyword %s does not exist' % k
        return
    if keyword.baiduIndex > 0 or keyword.baiduResult > 0:
        print 'Keyword %s is imported before' % k
        return
    print 'Try to analyze keyword %s/%s' % (keyword._id, k)

    ss.headers['Referer'] = 'http://www.5118.com/'
    t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(k)).text
    tree = html.fromstring(t)
    dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl')
    total = len(dls)
    for dl in dls:
        if dl.get('class', '') == 'dl-word':
            continue
        name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip())
        baidu_index = dl.xpath('./dd[2]/text()')[0].strip()
        baidu_result = dl.xpath('./dd[3]/text()')[0].strip()
        if not baidu_index.isdigit():
            baidu_index = 0
        if not baidu_result.isdigit():
            baidu_result = 0
        print 'Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result)

        if name == k:
            keyword.baiduIndex = int(baidu_index)
            keyword.baiduResult = int(baidu_result)
            if total > 2:
                keyword.total = total - 2
            keyword.save()
        else:
            if Keyword.count({'name': name}) > 0:
                print 'This keyword already exists'
                continue
            long_tail = Keyword()
            long_tail.name = name
            long_tail.level = KeywordLevel.LONG_TAIL
            long_tail.parentId = keyword._id
            long_tail.baiduIndex = int(baidu_index)
            long_tail.baiduResult = int(baidu_result)
            long_tail.save()

    time.sleep(random.randint(5, 15))
示例#2
0
def analyze_keyword(app, keyword):
    """
    分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字.
    目前是从5118抓取.
    """
    app.logger.info('Try to analyze keyword %s/%s' % (keyword._id, keyword.name))

    ss.headers['Referer'] = 'http://www.5118.com/'
    t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(keyword.name)).text
    tree = html.fromstring(t)
    dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl')
    total = len(dls)
    for dl in dls:
        if dl.get('class', '') == 'dl-word':
            continue
        name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip())
        baidu_index = dl.xpath('./dd[2]/text()')[0].strip()
        baidu_result = dl.xpath('./dd[3]/text()')[0].strip()
        if not baidu_index.isdigit():
            baidu_index = 0
        if not baidu_result.isdigit():
            baidu_result = 0
        app.logger.info('Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result))

        if name == keyword.name:
            keyword.baiduIndex = int(baidu_index)
            keyword.baiduResult = int(baidu_result)
            if total > 2:
                keyword.total = total - 2
            keyword.save()
        else:
            long_tail = Keyword.find_one({'name': name})
            if not long_tail:
                long_tail = Keyword()
                long_tail.name = name
                long_tail.level = KeywordLevel.LONG_TAIL
                long_tail.parentId = keyword._id

            long_tail.baiduIndex = int(baidu_index)
            long_tail.baiduResult = int(baidu_result)
            long_tail.save()