示例#1
0
def answer_test(answer_url):
    answer = Answer(answer_url)
    # 获取该答案回答的问题
    question = answer.get_question()
    # 获取该答案的作者
    author = answer.get_author()
    # 获取该答案获得的赞同数
    upvote = answer.get_upvote()
    # 获取改该答案所属问题被浏览次数
    visit_times = answer.get_visit_times()
    # 获取所有给该答案点赞的用户信息
    voters = answer.get_voters()
    # 把答案输出为txt文件
    answer.to_txt()
    # 把答案输出为markdown文件
    answer.to_md()

    print question
    # <zhihu.Question instance at 0x7f0b25d13f80>
    # 一个Question对象
    print question.get_title()  # 输出:现实可以有多美好?
    print author
    # <zhihu.User instance at 0x7f0b25425b90>
    # 一个User对象
    print voters # <generator object get_voters at 0x7f32fbe55730>(代表所有该答案点赞的用户的生成器)
    print author.get_user_id()  # 输出:田浩
    print upvote  # 输出:9320
    print visit_times  # 输出: 改答案所属问题被浏览次数
def answer_test():
    start = time.time()
    answer = Answer('http://www.zhihu.com/question/33488763/answer/56619442')
    answer.parser()
    print "Author is ", answer.get_author()
    print "Question url is ", answer.get_question_url()
    print "Question text is ", answer.get_question_text()
    print "Number of vote is ", answer.get_vote_num()
    print "Answer content is ", answer.get_content()
    print "Number of comment is ", answer.get_comment_num()
    print "Number of view is ", answer.get_view_num()
    print "Number of time being collected is ", answer.get_collected_num()
    print "Created time is ", answer.get_created_time()
    print "Last modified time is ", answer.get_last_modified_time()
    answer.save_answer_to_file()
    answer.save_all_comments()
    print "Answer robot vote rate is {0}".format(answer.save_all_voters_profile())  # might be time-consuming
    end = time.time()
    print "Time used is", end - start
示例#3
0
    def run(self):
        with self.threadingSum:
            logging.debug("%s start" % self.url)
            dbHandler = DbHandler()
            if not dbHandler.hasQuestion(self.url):
                # 插入新的问题
                question = Question(self.url)
                title = question.get_title()
                detail = question.get_detail()
                answerNum = question.get_answer_num()
                followersNum = question.get_followers_num()
                tags = ""
                for tag in question.get_tags():
                    tags += tag + ";"
                tags = tags[0: len(tags) - 1]
                questionDict = {"url": self.url, "title": title, 
                                "detail": detail, "followers": followersNum, 
                                "answerNum": answerNum, "tags": tags}
                dbHandler.insertNewQuestion(questionDict)

                zh_qid = dbHandler.getQueIdByUrl(self.url)
                # 插入新的答案
                for answer_link in question.get_all_answer_link():

                    answer = Answer(answer_link)
                    author = answer.get_author()
                    votes = answer.get_votes()
                    answerDict = {"url": answer_link, "author": author, "zh_qid": zh_qid, 
                              "votes": votes}
                    dbHandler.insertNewAnswer(answerDict)

                    # 插入图片地址
                    zh_aid = dbHandler.getAnsIdByUrl(answer_link)

                    for imgUrl in answer.get_all_pics():
                        dbHandler.insertNewImgUrl(zh_aid, imgUrl)

                    contents = answer.get_answer_content()
                    self.storeTheAnswer(zh_aid, contents)

                dbHandler.close()

            logging.debug("%s done" % self.url)
示例#4
0
questio_test = Question(question_url)
print "get_title:\t", questio_test.get_title()
print "get_detail:\t", questio_test.get_detail()
print "get_answers_num:\t", questio_test.get_answers_num()
print "get_followers_num:\t", questio_test.get_followers_num()
print "get_topics:\t", questio_test.get_topics()
print "get_all_answers:\t", questio_test.get_all_answers().next()
print "get_visit_times:\t", questio_test.get_visit_times()


print "\n" * 20

answer_test = Answer(questio_test.get_all_answers().next())

print "get_author\t", answer_test.get_author()
print "get_upvote\t", answer_test.get_upvote()
print "get_content\t", answer_test.get_content()
print "get_answerid\t", answer_test.get_answerid()

print "\n" * 20
voters_test = Voters(answer_test.get_answerid())
print "get_voters", voters_test.get_voters().next()


user_test = User(*voters_test.get_voters().next())

print "\n" * 20
print "get_user_id\t", user_test.get_user_id()
# print "get_data_id\t", user_test.get_data_id()
print "get_gender\t", user_test.get_gender()