Exemplo n.º 1
0
def fetch_proc(username):
    with dblock:
        dbhelper.update_user_by_name(username, {'fetch': dbhelper.FETCH_ING})
    conn = zhihu.get_conn()
    content = zhihu.fetch_people_page(conn, username)
    if content is None:
        conn.close()
        return
    
    src = zhihu.get_avatar_src(content)
    with dblock:
        dbhelper.update_user_by_name(username, {'avatar': src})

    link_list = zhihu.get_answer_link_list(content)
    rs = zhihu.saveAnswer(conn, username, link_list, dblock)

    num = zhihu.get_page_num(content)
    if num > 1:
        for i in range(2, num):
            content = zhihu.fetch_people_page(conn, username, i)
            if content is None:
                continue
            link_list = zhihu.get_answer_link_list(content)
            zhihu.saveAnswer(conn, username, link_list, dblock)
    with dblock:
        dbhelper.update_user_by_name(username, {'fetch': dbhelper.FETCH_OK})
    conn.close()
    zhihu.slog('### after saveAnswer ###')
    s.release()
Exemplo n.º 2
0
#coding: utf-8

import http.client
import zhihu
import dbhelper

conn = zhihu.get_conn()

# print("there are ",count(ids)," questions to fetch\n")

count = 0
insert_count = 0
while True:
    qid = dbhelper.next_question_id()
    if qid is None:
        break
    dbhelper.set_question_fetch(qid, dbhelper.FETCH_ING)
    url = "/question/{}".format(qid)
    conn.request("GET", url)
    response = conn.getresponse()
    if response is None:
        print('can not open', url)
    code = response.status
    print("{} [{}]".format(url, code))
    content = response.read()
    username_list = zhihu.get_username_list(content)

    for username, nickname in username_list.items():
        print("\t{:28s}{:8s}".format(username, nickname), end='')
        rs = dbhelper.saveUser(username, nickname)
        if rs is not None: