示例#1
0
文件: seo.py 项目: e3gs/flask-boot
def fetch_keywords():
    """
    获取站点级别的关键字并插入数据库中.
    """
    ss.headers['Referer'] = ym
    r = ss.get(ym + '/girls/all/')
    r.encoding = 'gbk'
    tree = html.fromstring(r.text)
    links = tree.xpath('//div[@class="listap"]/a')
    print 'Found %s keywords' % len(links)
    ret = []
    for link in links:
        msg = 'Processing keyword %s' % link.get('title')
        text = link.get('title').strip()
        keyword = Keyword.find_one({'name': text})
        name = unicode(text)
        if keyword:
            msg += ', skipped as existing'
        else:
            keyword = Keyword()
            keyword.name = name
            keyword.level = KeywordLevel.SITE
            keyword.refer = u'%s%s' % (ym, link.get('href'))
            keyword.save()
        print msg
        ret.append(name)

    return ret
示例#2
0
def hearsay(keyword_id):
    """
    编辑关键字对应的文章.
    """
    keyword = Keyword.find_one({'_id': keyword_id})
    if not keyword:
        abort(404)

    # Open page
    if request.method == 'GET':
        return render_template('seo/hearsay.html', keyword=keyword)
    # Handle post request
    else:
        current_app.logger.info('Try to save hearsay for keyword %s/%s' % (keyword._id, keyword.name))
        title = request.form.get('title', '')
        body = request.form.get('body', '')
        if not title:
            return jsonify(success=False, message='文章标题不能为空!')
        if not body:
            return jsonify(success=False, message='文章内容不能为空!')

        is_new = True if not keyword.hearsay else False
        keyword.hearsay.title = title
        keyword.hearsay.body = body
        keyword.updateTime = datetime.now()
        if is_new:
            keyword.status = KeywordStatus.PROCESSED

        keyword.save()

        if not current_app.debug and is_new:
            notify_baidu(current_app._get_current_object(), keyword._id)

        return jsonify(success=True, message='成功保存了你的文章。')
示例#3
0
def refresh(keyword_id):
    """
    刷新一个指定关键字的长尾关键字.
    """
    keyword = Keyword.find_one({'_id': keyword_id})
    if not keyword:
        abort(404)

    analyze_keyword(current_app._get_current_object(), keyword)
    return jsonify(success=True, message='成功触发了刷新请求,请稍候查看最新数据。')
示例#4
0
文件: seo.py 项目: e3gs/flask-boot
def analyze_keyword(k):
    """
    分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字.
    """
    keyword = Keyword.find_one({'name': k})
    if not keyword:
        print 'Keyword %s does not exist' % k
        return
    if keyword.baiduIndex > 0 or keyword.baiduResult > 0:
        print 'Keyword %s is imported before' % k
        return
    print 'Try to analyze keyword %s/%s' % (keyword._id, k)

    ss.headers['Referer'] = 'http://www.5118.com/'
    t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(k)).text
    tree = html.fromstring(t)
    dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl')
    total = len(dls)
    for dl in dls:
        if dl.get('class', '') == 'dl-word':
            continue
        name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip())
        baidu_index = dl.xpath('./dd[2]/text()')[0].strip()
        baidu_result = dl.xpath('./dd[3]/text()')[0].strip()
        if not baidu_index.isdigit():
            baidu_index = 0
        if not baidu_result.isdigit():
            baidu_result = 0
        print 'Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result)

        if name == k:
            keyword.baiduIndex = int(baidu_index)
            keyword.baiduResult = int(baidu_result)
            if total > 2:
                keyword.total = total - 2
            keyword.save()
        else:
            if Keyword.count({'name': name}) > 0:
                print 'This keyword already exists'
                continue
            long_tail = Keyword()
            long_tail.name = name
            long_tail.level = KeywordLevel.LONG_TAIL
            long_tail.parentId = keyword._id
            long_tail.baiduIndex = int(baidu_index)
            long_tail.baiduResult = int(baidu_result)
            long_tail.save()

    time.sleep(random.randint(5, 15))
示例#5
0
def analyze_keyword(app, keyword):
    """
    分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字.
    目前是从5118抓取.
    """
    app.logger.info('Try to analyze keyword %s/%s' % (keyword._id, keyword.name))

    ss.headers['Referer'] = 'http://www.5118.com/'
    t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(keyword.name)).text
    tree = html.fromstring(t)
    dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl')
    total = len(dls)
    for dl in dls:
        if dl.get('class', '') == 'dl-word':
            continue
        name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip())
        baidu_index = dl.xpath('./dd[2]/text()')[0].strip()
        baidu_result = dl.xpath('./dd[3]/text()')[0].strip()
        if not baidu_index.isdigit():
            baidu_index = 0
        if not baidu_result.isdigit():
            baidu_result = 0
        app.logger.info('Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result))

        if name == keyword.name:
            keyword.baiduIndex = int(baidu_index)
            keyword.baiduResult = int(baidu_result)
            if total > 2:
                keyword.total = total - 2
            keyword.save()
        else:
            long_tail = Keyword.find_one({'name': name})
            if not long_tail:
                long_tail = Keyword()
                long_tail.name = name
                long_tail.level = KeywordLevel.LONG_TAIL
                long_tail.parentId = keyword._id

            long_tail.baiduIndex = int(baidu_index)
            long_tail.baiduResult = int(baidu_result)
            long_tail.save()
示例#6
0
def longtail(keyword_id):
    """
    获取指定站点关键字下的长尾关键字.
    """
    keyword = Keyword.find_one({'_id': keyword_id})
    if not keyword:
        abort(404)

    s = request.args.get('status', u'bare,processed,repeated')
    p = int(request.args.get('page', '1'))
    start = (p - 1) * PAGE_COUNT
    condition = {'level': KeywordLevel.LONG_TAIL, 'parentId': keyword_id}
    status = s.split(u',')
    if status:
        condition['status'] = {'$in': status}

    count = Keyword.count(condition)
    cursor = Keyword.find(condition, skip=start, limit=PAGE_COUNT, sort=[('baiduIndex', pymongo.DESCENDING)])
    keywords = []
    for c in cursor:
        keywords.append(c)
    pagination = Pagination(p, PAGE_COUNT, count)

    return render_template('seo/longtail.html', keyword=keyword, keywords=keywords, pagination=pagination)