Exemplo n.º 1
0
def answer_test(answer_url):
    answer = Answer(answer_url)
    # 获取该答案回答的问题
    question = answer.get_question()
    # 获取该答案的作者
    author = answer.get_author()
    # 获取该答案获得的赞同数
    upvote = answer.get_upvote()
    # 获取改该答案所属问题被浏览次数
    visit_times = answer.get_visit_times()
    # 获取所有给该答案点赞的用户信息
    voters = answer.get_voters()
    # 把答案输出为txt文件
    answer.to_txt()
    # 把答案输出为markdown文件
    answer.to_md()

    print question
    # <zhihu.Question instance at 0x7f0b25d13f80>
    # 一个Question对象
    print question.get_title()  # 输出:现实可以有多美好?
    print author
    # <zhihu.User instance at 0x7f0b25425b90>
    # 一个User对象
    print voters # <generator object get_voters at 0x7f32fbe55730>(代表所有该答案点赞的用户的生成器)
    print author.get_user_id()  # 输出:田浩
    print upvote  # 输出:9320
    print visit_times  # 输出: 改答案所属问题被浏览次数
Exemplo n.º 2
0
    def parse_article(self, response):
        self.parse_article_count += 1
        Logging.info('======================%d' % self.parse_article_count)
        Logging.info(response.url)

        answer = Answer(response)

        Logging.info('author: %s' % answer.author())
        (contents, img_urls) = answer.content()
        Logging.info(contents)

        index = 0
        for url in img_urls:
            index += 1
            Logging.info(url)

            r = requests.get(url)
            if not os.path.exists('pic'):
                os.system('mkdir pic/')
            file_name = 'pic/' + answer.author() + '%d' % index + '.png'
            if r.status_code == 200:
                with open(file_name, 'w+') as f:
                    f.write(r.content)
                    f.close()

        return
Exemplo n.º 3
0
 def test_vote_up_with_url(self):
     time.sleep(1)
     answer = Answer(url="https://www.zhihu.com/question/19761434/answer/14005147")
     data = answer.vote_up()
     self.assertEqual("14005147", answer.id)
     self.assertIn("voting", data)
     self.assertIn("voteup_count", data)
Exemplo n.º 4
0
def main():
    viplist = []
    qlist=[]
    
    f1=open("userlist.txt","r")
    msg = f1.readline()
    while msg :
        print msg
        name = msg[0:len(msg)-1]
        viplist.append(name)
        msg=f1.readline()
    f1.close()
    
    f1=open("questionlist-filtered.txt","r")
    msg = f1.readline().strip("\n")
    while msg :
        print msg
        qlist.append(msg)
        msg=f1.readline().strip("\n")
    f1.close()
    offset = qlist.index("37344024")
#    offset = qlist.index("30014729", )
    for qid in qlist[offset:]:
        alist=[]
        autlist=[]
        qfile=open("question/"+qid+".txt","r")
        vfile=open("ansupv/"+qid+".txt","w")
        print "question/"+qid+".txt"
        line=qfile.readline().strip("\n")
        while line :
            ainfo = line.split(" ")
            autlist.append(ainfo[0])
            alist.append(ainfo[2])
            line=qfile.readline().strip("\n")
        qfile.close()
        
        urlpref="http://www.zhihu.com/question/"+qid
        auts = 0
        for aid in alist:
            vcount= 0
            vlist=[]
            if qid=="33554687" and aid == "56818808":
                continue
            ans = Answer(urlpref+"/answer/"+aid)
            print ans.answer_url
            ansvoters = ans.get_voters()
            for v in ansvoters:
                if v in viplist :
                    print vcount
                    print v
                    vlist.append(v)
                    vcount=vcount+1
            vfile.write(aid+" "+str(autlist[auts])+" "+str(vcount)+"\n")
            auts = auts+1
            for vt in vlist:
                vfile.write(vt+"\n")
        vfile.close
        
        print "finish "+qid
def dealWithPage(page_num):
    page = urlparse.urljoin(url_prefix, str(page_num))

    question_index_filename = 'q_index'
    try:
        question = Question(page)
    except:
        print 'Question Get Error.'
        return
    f = codecs.open(question_index_filename + '.txt', 'a')
    f.write(str(page_num) + '\r\n')
    f.close()
    question_folder = 'Zhihu/Question_' + str(page_num)
    question_filename = 'q'
    if (not os.path.exists(question_folder)):
        os.makedirs(question_folder)
    f = codecs.open(os.path.join(question_folder, question_filename + '.txt'),
                    'w')
    f.write(question.get_title() + '\r\n|||\r\n')
    f.write(question.get_detail() + '\r\n|||\r\n')
    for topic in question.get_topics():
        f.write(topic + '\t')
    f.write('\r\n|||\r\n')
    f.write(str(question.get_visit_times()) + '\r\n|||\r\n')
    f.write(str(question.get_followers_num()) + '\r\n|||\r\n')
    f.write(str(question.get_answers_num()) + '\r\n|||\r\n')
    f.close()

    answers = question.get_all_answers()
    for answer in answers:
        ansURL = answer.answer_url
        ans = Answer(ansURL)
        answer_folder = os.path.join(question_folder, 'Answer')
        answer_filename = str(ansURL.split('/')[-1])
        answer_index_filename = 'a_index'
        f = codecs.open(answer_index_filename + '.txt', 'a')
        f.write(str(page_num) + '|||' + str(answer_filename) + '\r\n')
        f.close()
        if (not os.path.exists(answer_folder)):
            os.makedirs(answer_folder)
        f = codecs.open(os.path.join(answer_folder, answer_filename + '.txt'),
                        'w')
        try:
            f.write(ans.get_content().find('body').get_text().strip().encode(
                "gbk", 'ignore') + '\r\n|||\r\n')
            f.write(ans.get_author().get_user_id() + '\r\n|||\r\n')
            f.write(str(ans.get_upvote()) + '\r\n|||\r\n')
        except:
            print 'TimeOut Occurred.'
            f.close()
            f = codecs.open(
                os.path.join(answer_folder, answer_filename + '.txt'), 'w')
            f.write('None')
            f.close()
            continue
        f.close()
Exemplo n.º 6
0
 def test_nothelp_cancel_with_url(self):
     time.sleep(1)
     data = Answer(
         url="https://www.zhihu.com/question/19761434/answer/14005147"
     ).thank_cancel()
     self.assertIn("is_thanked", data)
     self.assertEqual({"is_nothelp": False}, data)
Exemplo n.º 7
0
 def test_vote_neutral_with_url(self):
     time.sleep(1)
     data = Answer(
         url="https://www.zhihu.com/question/19761434/answer/14005147"
     ).vote_neutral()
     self.assertIn("voting", data)
     self.assertIn("voteup_count", data)
Exemplo n.º 8
0
 def test_thank_with_url(self):
     time.sleep(1)
     data = Answer(
         url="https://www.zhihu.com/question/19761434/answer/14005147"
     ).thank()
     self.assertIn("is_thanked", data)
     self.assertIn("true", data)
Exemplo n.º 9
0
    def setUpClass(cls):
        url = 'http://www.zhihu.com/question/24825703/answer/30975949'
        file_path = os.path.join(TEST_DATA_PATH, 'answer.html')
        with open(file_path, 'rb') as f:
            html = f.read()
        soup = BeautifulSoup(html)

        answer_saved_path = os.path.join(TEST_DATA_PATH, 'answer.md')
        with open(answer_saved_path, 'rb') as f:
            cls.answer_saved = f.read()

        cls.answer = Answer(url)
        cls.answer._session = None
        cls.answer.soup = soup
        cls.expected = {
            'id': 30975949,
            'aid': 7775236,
            'xsrf': 'cfd489623d34ca03adfdc125368c6426',
            'html': soup.prettify(),
            'author_id': 'tian-ge-xia',
            'author_name': '甜阁下',
            'question_id': 24825703,
            'question_title': '关系亲密的人之间要说「谢谢」吗?',
            'upvote_num': 1164,
            'upvoter_name': 'Mikuroneko',
            'upvoter_id': 'guo-yi-hui-23'
        }
Exemplo n.º 10
0
def answer_test(answer_url):

    answer = Answer(answer_url)
    # 获取该答案回答的问题
    question = answer.get_question()
    # 获取该答案的作者
    author = answer.get_author()
    # 获取该答案获得的赞同数
    upvote = answer.get_upvote()
    # 把答案输出为txt文件
    answer.to_txt()
    # 把答案输出为markdown文件
    answer.to_md()

    print question
    # <zhihu.Question instance at 0x7f0b25d13f80>
    # 一个Question对象
    print question.get_title()  # 输出:现实可以有多美好?
    print author
    # <zhihu.User instance at 0x7f0b25425b90>
    # 一个User对象
    print author.get_user_id()  # 输出:田浩
    print upvote  # 输出:9320
Exemplo n.º 11
0
def answer_test(answer_url):

    answer = Answer(answer_url)
    # 获取该答案回答的问题
    question = answer.get_question()
    # 获取该答案的作者
    author = answer.get_author()
    # 获取该答案获得的赞同数
    upvote = answer.get_upvote()
    # 把答案输出为txt文件
    answer.to_txt()
    # 把答案输出为markdown文件
    answer.to_md()

    print question 
    # <zhihu.Question instance at 0x7f0b25d13f80>
    # 一个Question对象
    print question.get_title() # 输出:现实可以有多美好?
    print author 
    # <zhihu.User instance at 0x7f0b25425b90>
    # 一个User对象
    print author.get_user_id() # 输出:田浩
    print upvote # 输出:9320
Exemplo n.º 12
0
 def test_images(self):
     Answer(url="https://www.zhihu.com/question/58481349/answer/184247410"
            ).images()
Exemplo n.º 13
0
 def test_nothelp_with_url(self):
     time.sleep(1)
     data = Answer(
         url="https://www.zhihu.com/question/19761434/answer/14005147"
     ).thank()
     self.assertEqual({"is_nothelp": True}, data)
Exemplo n.º 14
0
 def test_vote_down_with_id(self):
     time.sleep(1)
     data = Answer(id=14005147).vote_down()
     self.assertIn("voting", data)
     self.assertIn("voteup_count", data)
Exemplo n.º 15
0
def vote_up_with_id():
    data = Answer(id=14005147).vote_up()
    print(data)
Exemplo n.º 16
0
import requests.utils
import pickle
from http.cookies import SimpleCookie

from zhihu import Zhihu
from zhihu import Answer

zhihu = Zhihu()
# print(zhihu.cookies)
# 查看用户profile 成功
# profile = zhihu.profile(user_slug="xiaoxiaodouzi")
# print(profile)

# 发送私信 成功
# response = zhihu.send_message(content='TESTMESSAGE', user_slug="xiaoxiaodouzi")
# print(response)

# 关注用户 成功
# response = zhihu.follow(user_slug='SemitLee')
# print(response)

answer = Answer(url="https://www.zhihu.com/question/34401174/answer/389502954")
r = answer.images(path="images")
# print(r)
# 赞同答案 成功
response = answer.vote_neutral()
answer.thank_cancel()
# print(response)


Exemplo n.º 17
0
 def test_vote_up_with_id(self):
     data = Answer(id=14005147).vote_up()
     self.assertIn("voting", data)
     self.assertIn("voteup_count", data)
Exemplo n.º 18
0
def answer_test(answer_url):
    answer = Answer(answer_url)
    # 获取该答案回答的问题
    question = answer.get_question()
    # 获取该答案的作者
    author = answer.get_author()
    # 获取该答案获得的赞同数
    upvote = answer.get_upvote()
    # 获取改该答案所属问题被浏览次数
    visit_times = answer.get_visit_times()
    # 获取所有给该答案点赞的用户信息
    voters = answer.get_voters()
    # 获取答案长度
    answer_length = answer.get_answer_length()
    # 把答案输出为txt文件
    answer.to_txt()
    # 把答案输出为markdown文件
    answer.to_md()

    print question
    # <zhihu.Question instance at 0x7f0b25d13f80>
    # 一个Question对象
    print question.get_title()  # 输出:现实可以有多美好?
    print author
    # <zhihu.User instance at 0x7f0b25425b90>
    # 一个User对象
    for voter in voters:
        print voter
        # 一个 User 对象
    print author.get_user_id()  # 输出:田浩
    print upvote  # 输出:9320
    print visit_times  # 输出: 改答案所属问题被浏览次数
    print 'answer_length', answer_length  # 输出:
Exemplo n.º 19
0
def answer_test(answer_url):
    answer = Answer(answer_url)
    # 获取该答案回答的问题
    question = answer.get_question()
    # 获取该答案的作者
    author = answer.get_author()
    # 获取该答案获得的赞同数
    upvote = answer.get_upvote()
    # 获取改该答案所属问题被浏览次数
    visit_times = answer.get_visit_times()
    # 获取所有给该答案点赞的用户信息
    voters = answer.get_voters()
    # 把答案输出为txt文件
    answer.to_txt()
    # 把答案输出为markdown文件
    answer.to_md()

    print question
    # <zhihu.Question instance at 0x7f0b25d13f80>
    # 一个Question对象
    print question.get_title()  # 输出:现实可以有多美好?
    print author
    # <zhihu.User instance at 0x7f0b25425b90>
    # 一个User对象
    print voters  # <generator object get_voters at 0x7f32fbe55730>(代表所有该答案点赞的用户的生成器)
    print author.get_user_id()  # 输出:田浩
    print upvote  # 输出:9320
    print visit_times  # 输出: 改答案所属问题被浏览次数
Exemplo n.º 20
0
# -*- coding: utf-8 -*-
from zhihu import Answer

answer_url = "http://www.zhihu.com/question/24269892/answer/29960616"
answer = Answer(answer_url)
# 获取该答案回答的问题
question = answer.get_question()
# 获取该答案的作者
author = answer.get_author()
# 获取该答案获得的赞同数
upvote = answer.get_upvote()
# 获取该答案所属问题被浏览次数
visit_times = answer.get_visit_times()
# 获取所有给该答案点赞的用户信息
voters = answer.get_voters()
# 把答案输出为txt文件
answer.to_txt()
# 把答案输出为markdown文件
answer.to_md()

print question
# <zhihu.Question instance at 0x7f0b25d13f80>
# 一个Question对象
print question.get_title()  # 输出:现实可以有多美好?
print author
# <zhihu.User instance at 0x7f0b25425b90>
# 一个User对象
print voters
# <generator object get_voters at 0x7f32fbe55730>
# 代表所有该答案点赞的用户的生成器
print author.get_user_id()  # 输出:田浩
Exemplo n.º 21
0
# -*- coding: utf-8 -*-
from zhihu import Question
from zhihu import Answer
from zhihu import User
from zhihu import Collection
import requests

url = "https://www.zhihu.com/question/19550321/answer/14240492"
ans = Answer(url)
print ans.get_content().find('body').get_text()
input()
Exemplo n.º 22
0
from zhihu import Answer

answer = Answer(url="https://www.zhihu.com/question/30913458/answer/193839736")
images = answer.images()
print(images)