Exemplo n.º 1
0
#-*- coding:utf-8 -*-

from zhihu_api import User,Answer,Question,page_exist

import db,sys

reload(sys)
sys.setdefaultencoding('utf8')

users = db.get_existing("users")
pool_answers = db.get_existing("answers")
i = 1
saved_num = 1277

for user in users:
    print i,user
    i += 1
    if i < saved_num :
        continue
    else:
        if User(user).answers_num()!=0:
            j = 0
            answers = User(user).get_answers()
            for answer in answers:
                if answer.answer_url not in pool_answers:
                    if page_exist(answer.answer_url):
                        url = answer.answer_url
                        tmp = Answer(url)
                        if tmp.get_content()!= None:
                            try:
                                db.insert_answer(tmp.answer_url,tmp.get_author(),tmp.get_content(),tmp.get_upvote(),tmp.get_question())
Exemplo n.º 2
0
# -*- coding:utf-8 -*-

from zhihu_api import User, Answer, Question

import Queue, db, sys

reload(sys)
sys.setdefaultencoding("utf8")


users = db.get_existing("users")
pool = db.get_existing("questions")

for user in users:
    questions = User(user).get_asks()
    answers = User(user).get_answers()
    if questions != None:
        for question in questions:
            if question.url not in pool:
                url = question.url
                tmp = Question(url)
                db.insert_question(
                    tmp.get_title(), url, tmp.get_followers_num(), tmp.get_answers_num(), tmp.get_detail()
                )
Exemplo n.º 3
0
#-*- coding:utf-8 -*-

from zhihu_api import User
import Queue,db,sys


reload(sys)
sys.setdefaultencoding('utf8')

initial_page = u'http://www.zhihu.com/people/liu-yang-57-86'
url_queue = Queue.Queue()
seen = db.get_existing("users")
seen.add(initial_page)
url_queue.put(initial_page)
while(True): #一直进行直到海枯石烂
    if url_queue.qsize()>0:
        current_url = url_queue.get()    #拿出队例中第一个的url
        #store(current_url)               把这个url代表的网页存储好
        for next_url in User(current_url).get_followees(): #提取把这个url里链向的url
            if next_url not in seen:
                seen.add(next_url)
                try:
                    name,url,followees_num,followers_num,thanks_num,agree_num,asks_num,answers_num = User(next_url).get_info()
                    db.insert_user(name,url,followees_num,followers_num,thanks_num,agree_num,asks_num,answers_num)
                except:
                    print next_url+'用户不存在'
                url_queue.put(next_url)
    else:
        break