def answer_test(answer_url): answer = Answer(answer_url) # 获取该答案回答的问题 question = answer.get_question() # 获取该答案的作者 author = answer.get_author() # 获取该答案获得的赞同数 upvote = answer.get_upvote() # 获取改该答案所属问题被浏览次数 visit_times = answer.get_visit_times() # 获取所有给该答案点赞的用户信息 voters = answer.get_voters() # 把答案输出为txt文件 answer.to_txt() # 把答案输出为markdown文件 answer.to_md() print question # <zhihu.Question instance at 0x7f0b25d13f80> # 一个Question对象 print question.get_title() # 输出:现实可以有多美好? print author # <zhihu.User instance at 0x7f0b25425b90> # 一个User对象 print voters # <generator object get_voters at 0x7f32fbe55730>(代表所有该答案点赞的用户的生成器) print author.get_user_id() # 输出:田浩 print upvote # 输出:9320 print visit_times # 输出: 改答案所属问题被浏览次数
def parse_article(self, response): self.parse_article_count += 1 Logging.info('======================%d' % self.parse_article_count) Logging.info(response.url) answer = Answer(response) Logging.info('author: %s' % answer.author()) (contents, img_urls) = answer.content() Logging.info(contents) index = 0 for url in img_urls: index += 1 Logging.info(url) r = requests.get(url) if not os.path.exists('pic'): os.system('mkdir pic/') file_name = 'pic/' + answer.author() + '%d' % index + '.png' if r.status_code == 200: with open(file_name, 'w+') as f: f.write(r.content) f.close() return
def test_vote_up_with_url(self): time.sleep(1) answer = Answer(url="https://www.zhihu.com/question/19761434/answer/14005147") data = answer.vote_up() self.assertEqual("14005147", answer.id) self.assertIn("voting", data) self.assertIn("voteup_count", data)
def main(): viplist = [] qlist=[] f1=open("userlist.txt","r") msg = f1.readline() while msg : print msg name = msg[0:len(msg)-1] viplist.append(name) msg=f1.readline() f1.close() f1=open("questionlist-filtered.txt","r") msg = f1.readline().strip("\n") while msg : print msg qlist.append(msg) msg=f1.readline().strip("\n") f1.close() offset = qlist.index("37344024") # offset = qlist.index("30014729", ) for qid in qlist[offset:]: alist=[] autlist=[] qfile=open("question/"+qid+".txt","r") vfile=open("ansupv/"+qid+".txt","w") print "question/"+qid+".txt" line=qfile.readline().strip("\n") while line : ainfo = line.split(" ") autlist.append(ainfo[0]) alist.append(ainfo[2]) line=qfile.readline().strip("\n") qfile.close() urlpref="http://www.zhihu.com/question/"+qid auts = 0 for aid in alist: vcount= 0 vlist=[] if qid=="33554687" and aid == "56818808": continue ans = Answer(urlpref+"/answer/"+aid) print ans.answer_url ansvoters = ans.get_voters() for v in ansvoters: if v in viplist : print vcount print v vlist.append(v) vcount=vcount+1 vfile.write(aid+" "+str(autlist[auts])+" "+str(vcount)+"\n") auts = auts+1 for vt in vlist: vfile.write(vt+"\n") vfile.close print "finish "+qid
def dealWithPage(page_num): page = urlparse.urljoin(url_prefix, str(page_num)) question_index_filename = 'q_index' try: question = Question(page) except: print 'Question Get Error.' return f = codecs.open(question_index_filename + '.txt', 'a') f.write(str(page_num) + '\r\n') f.close() question_folder = 'Zhihu/Question_' + str(page_num) question_filename = 'q' if (not os.path.exists(question_folder)): os.makedirs(question_folder) f = codecs.open(os.path.join(question_folder, question_filename + '.txt'), 'w') f.write(question.get_title() + '\r\n|||\r\n') f.write(question.get_detail() + '\r\n|||\r\n') for topic in question.get_topics(): f.write(topic + '\t') f.write('\r\n|||\r\n') f.write(str(question.get_visit_times()) + '\r\n|||\r\n') f.write(str(question.get_followers_num()) + '\r\n|||\r\n') f.write(str(question.get_answers_num()) + '\r\n|||\r\n') f.close() answers = question.get_all_answers() for answer in answers: ansURL = answer.answer_url ans = Answer(ansURL) answer_folder = os.path.join(question_folder, 'Answer') answer_filename = str(ansURL.split('/')[-1]) answer_index_filename = 'a_index' f = codecs.open(answer_index_filename + '.txt', 'a') f.write(str(page_num) + '|||' + str(answer_filename) + '\r\n') f.close() if (not os.path.exists(answer_folder)): os.makedirs(answer_folder) f = codecs.open(os.path.join(answer_folder, answer_filename + '.txt'), 'w') try: f.write(ans.get_content().find('body').get_text().strip().encode( "gbk", 'ignore') + '\r\n|||\r\n') f.write(ans.get_author().get_user_id() + '\r\n|||\r\n') f.write(str(ans.get_upvote()) + '\r\n|||\r\n') except: print 'TimeOut Occurred.' f.close() f = codecs.open( os.path.join(answer_folder, answer_filename + '.txt'), 'w') f.write('None') f.close() continue f.close()
def test_nothelp_cancel_with_url(self): time.sleep(1) data = Answer( url="https://www.zhihu.com/question/19761434/answer/14005147" ).thank_cancel() self.assertIn("is_thanked", data) self.assertEqual({"is_nothelp": False}, data)
def test_vote_neutral_with_url(self): time.sleep(1) data = Answer( url="https://www.zhihu.com/question/19761434/answer/14005147" ).vote_neutral() self.assertIn("voting", data) self.assertIn("voteup_count", data)
def test_thank_with_url(self): time.sleep(1) data = Answer( url="https://www.zhihu.com/question/19761434/answer/14005147" ).thank() self.assertIn("is_thanked", data) self.assertIn("true", data)
def setUpClass(cls): url = 'http://www.zhihu.com/question/24825703/answer/30975949' file_path = os.path.join(TEST_DATA_PATH, 'answer.html') with open(file_path, 'rb') as f: html = f.read() soup = BeautifulSoup(html) answer_saved_path = os.path.join(TEST_DATA_PATH, 'answer.md') with open(answer_saved_path, 'rb') as f: cls.answer_saved = f.read() cls.answer = Answer(url) cls.answer._session = None cls.answer.soup = soup cls.expected = { 'id': 30975949, 'aid': 7775236, 'xsrf': 'cfd489623d34ca03adfdc125368c6426', 'html': soup.prettify(), 'author_id': 'tian-ge-xia', 'author_name': '甜阁下', 'question_id': 24825703, 'question_title': '关系亲密的人之间要说「谢谢」吗?', 'upvote_num': 1164, 'upvoter_name': 'Mikuroneko', 'upvoter_id': 'guo-yi-hui-23' }
def answer_test(answer_url): answer = Answer(answer_url) # 获取该答案回答的问题 question = answer.get_question() # 获取该答案的作者 author = answer.get_author() # 获取该答案获得的赞同数 upvote = answer.get_upvote() # 把答案输出为txt文件 answer.to_txt() # 把答案输出为markdown文件 answer.to_md() print question # <zhihu.Question instance at 0x7f0b25d13f80> # 一个Question对象 print question.get_title() # 输出:现实可以有多美好? print author # <zhihu.User instance at 0x7f0b25425b90> # 一个User对象 print author.get_user_id() # 输出:田浩 print upvote # 输出:9320
def test_images(self): Answer(url="https://www.zhihu.com/question/58481349/answer/184247410" ).images()
def test_nothelp_with_url(self): time.sleep(1) data = Answer( url="https://www.zhihu.com/question/19761434/answer/14005147" ).thank() self.assertEqual({"is_nothelp": True}, data)
def test_vote_down_with_id(self): time.sleep(1) data = Answer(id=14005147).vote_down() self.assertIn("voting", data) self.assertIn("voteup_count", data)
def vote_up_with_id(): data = Answer(id=14005147).vote_up() print(data)
import requests.utils import pickle from http.cookies import SimpleCookie from zhihu import Zhihu from zhihu import Answer zhihu = Zhihu() # print(zhihu.cookies) # 查看用户profile 成功 # profile = zhihu.profile(user_slug="xiaoxiaodouzi") # print(profile) # 发送私信 成功 # response = zhihu.send_message(content='TESTMESSAGE', user_slug="xiaoxiaodouzi") # print(response) # 关注用户 成功 # response = zhihu.follow(user_slug='SemitLee') # print(response) answer = Answer(url="https://www.zhihu.com/question/34401174/answer/389502954") r = answer.images(path="images") # print(r) # 赞同答案 成功 response = answer.vote_neutral() answer.thank_cancel() # print(response)
def test_vote_up_with_id(self): data = Answer(id=14005147).vote_up() self.assertIn("voting", data) self.assertIn("voteup_count", data)
def answer_test(answer_url): answer = Answer(answer_url) # 获取该答案回答的问题 question = answer.get_question() # 获取该答案的作者 author = answer.get_author() # 获取该答案获得的赞同数 upvote = answer.get_upvote() # 获取改该答案所属问题被浏览次数 visit_times = answer.get_visit_times() # 获取所有给该答案点赞的用户信息 voters = answer.get_voters() # 获取答案长度 answer_length = answer.get_answer_length() # 把答案输出为txt文件 answer.to_txt() # 把答案输出为markdown文件 answer.to_md() print question # <zhihu.Question instance at 0x7f0b25d13f80> # 一个Question对象 print question.get_title() # 输出:现实可以有多美好? print author # <zhihu.User instance at 0x7f0b25425b90> # 一个User对象 for voter in voters: print voter # 一个 User 对象 print author.get_user_id() # 输出:田浩 print upvote # 输出:9320 print visit_times # 输出: 改答案所属问题被浏览次数 print 'answer_length', answer_length # 输出:
# -*- coding: utf-8 -*- from zhihu import Answer answer_url = "http://www.zhihu.com/question/24269892/answer/29960616" answer = Answer(answer_url) # 获取该答案回答的问题 question = answer.get_question() # 获取该答案的作者 author = answer.get_author() # 获取该答案获得的赞同数 upvote = answer.get_upvote() # 获取该答案所属问题被浏览次数 visit_times = answer.get_visit_times() # 获取所有给该答案点赞的用户信息 voters = answer.get_voters() # 把答案输出为txt文件 answer.to_txt() # 把答案输出为markdown文件 answer.to_md() print question # <zhihu.Question instance at 0x7f0b25d13f80> # 一个Question对象 print question.get_title() # 输出:现实可以有多美好? print author # <zhihu.User instance at 0x7f0b25425b90> # 一个User对象 print voters # <generator object get_voters at 0x7f32fbe55730> # 代表所有该答案点赞的用户的生成器 print author.get_user_id() # 输出:田浩
# -*- coding: utf-8 -*- from zhihu import Question from zhihu import Answer from zhihu import User from zhihu import Collection import requests url = "https://www.zhihu.com/question/19550321/answer/14240492" ans = Answer(url) print ans.get_content().find('body').get_text() input()
from zhihu import Answer answer = Answer(url="https://www.zhihu.com/question/30913458/answer/193839736") images = answer.images() print(images)