Exemplo n.º 1
0
Arquivo: app.py Projeto: whr0623/DCVS
def jd_crawler():
    pid = request.values.get('pid', 0)
    # 验证码忽略大小写
    code = str(request.values.get('captcha', 0)).lower()

    # 判断验证码是否正确
    if code != session.get('captcha'):
        result = {'result': 'wrong_code'}
        return json.dumps(result)

    # 查询数据库中是否已存在该商品
    product = get_product_by_pid(pid)
    if product is None:
        # 若无,将获取的pid与网址前后缀连接,插入到任务队列
        redis_client.lpush('jd:items_urls', PREFIX + pid + POSTFIX)
        # 每两秒查询一次,爬取任务是否完成
        while int(redis_client.get(pid)) > 0:
            time.sleep(2)
    elif len(product.comments) < WORK_LOAD:
        # 说明当前爬取任务正在进行中
        # 每两秒查询一次,爬取任务是否完成
        while int(redis_client.get(pid)) > 0:
            time.sleep(2)

    result = {'result': 'ok', 'pid': pid}
    return json.dumps(result)
Exemplo n.º 2
0
def jd_crawler():
    pid = request.values.get('pid', 0)

    # 查询数据库中是否已存在该商品
    product = get_product_by_pid(pid)
    if product is None:
        # 若无,将获取的pid与网址前后缀连接,插入到任务队列
        redis_client.lpush('jd:items_urls', PREFIX + pid + POSTFIX)
        # 每两秒查询一次,爬取任务是否完成
        while int(redis_client.get(pid)) > 0:
            time.sleep(2)
    else:
        # 说明当前爬取任务正在进行中
        # 每两秒查询一次,爬取任务是否完成
        while int(redis_client.get(pid)) > 0:
            time.sleep(2)

    result = {'result': 'ok', 'pid': pid}
    return json.dumps(result)
Exemplo n.º 3
0
def jd_charts(chart, pid):
    product = get_product_by_pid(pid)
    if product is None:
        return '404!!!'
    jd_page = JDPage(product)
    chart_type = 1
    if chart == 'bar':
        jd_page.generate_bar_charts()
    elif chart == 'pie':
        jd_page.generate_pie_charts()
        chart_type = 2
    elif chart == 'wordcloud':
        jd_page.generate_word_cloud_charts()
        chart_type = 3
    else:
        return '404!'

    return render_template("dashboard.html",
                           pid=pid,
                           chart_type=chart_type,
                           myechart=Markup(jd_page.page.render_embed()))
Exemplo n.º 4
0
Arquivo: app.py Projeto: whr0623/DCVS
def jd_charts(chart, pid):
    product = get_product_by_pid(pid)
    if product is None:
        return '404!!!'
    jd_page = JDPage(product)
    chart_type = 1
    if chart == 'bar':
        jd_page.generate_stacked_bar_charts()
    elif chart == 'pie':
        jd_page.generate_pie_charts()
        chart_type = 2
    elif chart == 'wordcloud':
        jd_page.generate_word_cloud_charts()
        chart_type = 3
    else:
        return '404!'

    return render_template("dashboard.html",
                           pid=pid,
                           chart_type=chart_type,
                           myechart=jd_page.page.render_embed(),
                           host=REMOTE_HOST,
                           script_list=jd_page.page.get_js_dependencies())
Exemplo n.º 5
0
from util.db_util import get_product_by_pid


def get_summary_and_weight(comments):
    """
    获得评论内容的摘要和权重

    :param comments: 需要分析的评论列表
    :return: 返回摘要列表和对应的权重列表
    """
    d = {}
    att = []
    val = []
    for c in comments:
        nlp = SnowNLP(c.content)
        # 评论获赞数越多,权重越高,取对数来平滑极差
        w = int(math.log(c.votes + 1) + 1)**2
        for kw in nlp.summary():
            if d.get(kw):
                d[kw] += w
            else:
                d[kw] = w
    for k in d.keys():
        att.append(k)
        val.append(d.get(k))
    return att, val


if __name__ == '__main__':
    product = get_product_by_pid(100000822981)
Exemplo n.º 6
0
Arquivo: app.py Projeto: whr0623/DCVS
def jd_dashboard(pid):
    product = get_product_by_pid(pid)
    if product is None:
        return '404!!!'
    return render_template('dashboard.html', product=product)