Пример #1
0
def getLatestBestAnserwerAndSave():
    # phoneNum = '+8613096348217'
    # pw = '2015141463222'

    ans_num = 20
    i=0


    TOKEN_FILE = 'token.pkl'
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)

    # try:
    #     client.login(phoneNum, pw)
    # except NeedCaptchaException:
    #     # 保存验证码并提示输入,重新登录
    #     with open('a.gif', 'wb') as f:
    #         f.write(client.get_captcha())
    #     captcha = input('please input captcha:')
    #     client.login(phoneNum, pw, captcha)

    java = client.topic(19550867)
    BA = java.best_answers
    for answ in BA:
        ansItem2artical(ansItem(answ)).save()
        i = i+1

        if i==ans_num:
            break
Пример #2
0
 def start(self):
     try:
         client = ZhihuClient()
         client.login_in_terminal()
         client.save_token(Path.ZHIHUTOKEN)
     except NeedLoginException:
         print u"Oops, please try again."
         sys.exit()
     return
Пример #3
0
def LoginZhihuClient(token_name):
    TOKEN_FILE = 'liuximing.pkl'
    client = ZhihuClient()
    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)
    me = client.me()
    return me
Пример #4
0
 def get_client(self, reset_=0):
     client = ZhihuClient()
     if reset_ != 0:
         client.login_in_terminal()
         client.save_token(TOKEN_FILE)
     if os.path.isfile(TOKEN_FILE):
         client.load_token(TOKEN_FILE)
     else:
         client.login_in_terminal()
         client.save_token(TOKEN_FILE)
     return client
Пример #5
0
    def login(self):
        TOKEN_FILE = 'token.pkl'

        client = ZhihuClient()

        if os.path.isfile(TOKEN_FILE):
            client.load_token(TOKEN_FILE)
        else:
            client.login_in_terminal()
            client.save_token(TOKEN_FILE)
        return client
Пример #6
0
class Login():
    def __init__(self):
        self.TOKEN_FILE = 'token.pkl.' + str(sys.version_info[0])
        self.client = ZhihuClient()

    def client_login(self):
        if not os.path.isfile(self.TOKEN_FILE):
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)
        else:
            self.client.load_token(self.TOKEN_FILE)
        return self.client
Пример #7
0
def zhihu_login():
    r"""
    知乎登陆
    :return:        登陆之后的客户端client
    """
    client = ZhihuClient()
    # 登录
    if os.path.isfile(TOKEN_FILE_NAME):
        client.load_token(TOKEN_FILE_NAME)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE_NAME)
    return client
Пример #8
0
def main():
    client = ZhihuClient()

    try:
        # client.login(email_or_phone, password)
        client.login_in_terminal(username=email_or_phone, password=password)
        client.save_token(TOKEN_FILE)          # 保存登录会话,留着以后登录用
        # raise NeedCaptchaException
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('请输入验证码: ')
        client.login(email_or_phone, password, captcha)

    data_out_list_a = []
    line_saved = 0
    max_lines = 1

    with open(USER_CSV_PATH) as file:
        for line in file.readlines():
            crawl_id = line.strip('\n')
            my_crawl = MyCrawler(crawl_id, client)
            print('------>>>| 待爬取的用户的知乎id为: ', crawl_id)

            data_a = my_crawl.crawling_answer(crawl_id)
            print('该用户爬取完毕'.center(60, '*'))
            if len(data_a) % 60 == 0:
                tmp_time = int(len(data_a) / 60)
                for i in range(tmp_time):
                    data_out_list_a.append(data_a[60*i:60*(i+1)])
            else:
                print('无用的输出!')

            # sleep(randint(1, 3))
            line_saved += 1

            if line_saved == max_lines:
                save_to_csv_a(data_out_list_a, client)
                data_out_list_a = []
                line_saved = 0

    print('全部用户采集完毕'.center(40, '*'))
Пример #9
0
from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient
from zhihu_oauth import SearchType

TOKEN_FILE = 'token.pkl'

client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)

me = client.me()

print('name', me.name)
print('headline', me.headline)
print('description', me.description)

print('following topic count', me.following_topic_count)
print('following people count', me.following_topic_count)
print('followers count', me.follower_count)

print('voteup count', me.voteup_count)
print('get thanks count', me.thanked_count)
Пример #10
0
import sys
from timeout import timeout
import os
from utils import print_err
from pymongo import MongoClient

MAX_SLEEP_TIME = 15
Cookies_File = './cookies/cookies%s.json' % sys.argv[1]
global client
client = ZhihuClient()
if os.path.isfile(Cookies_File):
    client.load_token(Cookies_File)
else:
    client_info = open('./cookies/client_info_list.data').readlines()
    client_info = client_info[int(sys.argv[1])].strip().split('\t')
    client.login_in_terminal(client_info[0], client_info[1])
    client.save_token(Cookies_File)


def get_user_questions(uname):
    global client
    if uname == '':
        return
    print(uname)

    user_questions = dict()
    try:
        people = client.people(uname)
        user_questions['_id'] = uname
        user_questions['owner'] = uname
        user_questions['questions'] = []
Пример #11
0
# coding=utf-8

from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient


TOKEN_FILE = 'ZHIHUTOKEN.pkl'


client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)
Пример #12
0
def login(username, password):
    client = ZhihuClient()
    client.login_in_terminal(username, password)
    return client
Пример #13
0
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException
client = ZhihuClient()
user = '******'
pwd = '6666666'
try:
    client.login_in_terminal(user, pwd)
    print(u"登陆成功!")
except NeedCaptchaException:  # 处理要验证码的情况
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login_in_terminal(user, pwd, captcha)
    print(u"登陆成功!")

client.save_token('token.pkl')  # 保存token
# 必须在 client 已经处于登录状态时才能使用
#有了token之后,下次登录就可以直接加载token文件了
# client.load_token('filename')
# client.login_in_terminal() # or ('*****@*****.**', 'password')
Пример #14
0
# coding=utf-8

from __future__ import unicode_literals, print_function

import os
from zhihu_oauth import ZhihuClient


TOKEN_FILE = 'token.pkl'


client = ZhihuClient()


if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal(use_getpass=False)
    client.save_token(TOKEN_FILE)
#!/usr/local/bin/python3

#install:
#pip install -U zhihu_oauth
import os

abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
try:
    client.load_token('token.pkl')
except:
    client.login_in_terminal('xxxxxxxxxxusernamexxxxxxxxxxxx', 'xxxxxxxxxxxpasswordxxxxxxxxxxx')
    client.save_token('token.pkl')


client.load_token('token.pkl')
outfile = "已订阅专栏.xml"
with open(outfile, 'w', encoding='utf-8') as output:
    output.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
    output.write("<opml version=\"1.0\">\n")
    output.write("	<body>\n")

    me = client.me()
    columns = me.following_columns
    for column in columns:
        url = "https://trtyheavef-rsshub.herokuapp.com/zhihu/zhuanlan/" + u"{}".format(column.id)
        #url = 'https://zhuanlan.zhihu.com/' + u"{}".format(column.id)
Пример #16
0
class ZhiHu(object):
    TOKEN_FILE = 'token.pkl'

    def __init__(self):
        """
        初始化
        """
        self.login_zhihu()
        self.db = EasySqlite('zhihu.db')

    def login_zhihu(self):
        """
        登录知乎
        :return:
        """
        self.client = ZhihuClient()
        if os.path.isfile(self.TOKEN_FILE):
            self.client.load_token(self.TOKEN_FILE)
        else:
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)

    def save_quesions(self, topic_id):
        """
        保存话题下的问题
        :param topic_id:
        :return:
        """
        topic = self.client.topic(topic_id)
        print(topic)
        questions = topic.unanswered_questions
        sql_tmp = 'replace into questions values(?,?,?,?,?,?)'
        for question in questions:
            if question.answer_count < 10:
                continue
            row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count,
                   topic_id]
            print(row)
            ret = self.db.update(sql_tmp, args=row)
            if not ret:
                print('insert error!')
            else:
                print('insert success!')

    def save_answer_info(self, question_id):
        """
        保存指定问题的答案概况
        :param question_id:
        :return:
        """
        question = self.client.question(question_id)
        print(question.title)
        answers = question.answers
        for answer in answers:
            print(answer.comment_count, answer.excerpt, answer.question, answer.thanks_count,
                  answer.voteup_count)
            answer.save()
            break
        # sql_tmp = 'replace into questions values(?,?,?,?,?,?)'
        # for question in questions:
        #     if question.answer_count < 10:
        #         continue
        #     row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count,
        #            topic_id]
        #     print(row)
        #     ret = self.db.update(sql_tmp, args=row)
        #     if not ret:
        #         print('insert error!')
        #     else:
        #         print('insert success!')

    def to_md(self, topic, file_name):
        sql = "select * from questions where topic_id = '%s' order by follower_count desc limit 1000" % topic
        ret = self.db.query(sql)
        line_tmp = "%s. [%s](https://www.zhihu.com/question/%s) 关注数:%s 回答数:%s 评论数:%s<br>\n"
        i = 1
        with open(file_name, 'w', encoding='utf8') as f:
            for item in ret:
                line = line_tmp % (i, item['title'], item['id'], item['follower_count'], item['answer_count'], item['comment_count'])
                f.write(line)
                i += 1
Пример #17
0
#encoding=utf8
from __future__ import unicode_literals, print_function
import os
import re
import urllib
import math
import numpy as np
from zhihu_oauth import ZhihuClient
TOKEN_FILE = 'token.pkl'
client = ZhihuClient()
if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal('*****@*****.**', 'xhdbfs1234567')
    client.save_token(TOKEN_FILE)
id = 24400664
question = client.question(id)
print("question: " + question.title)
print("回答数量: ", question.answer_count)
#os.mkdir(question.title+" pics")
path = question.title + " pics"
index = 1
arr = []
for answer in question.answers:
    arr.append(answer.voteup_count)
mean = np.mean(arr)
print(mean)
std = np.std(arr)
print(std)
for answer in question.answers:
    if answer.voteup_count < 2000:
Пример #18
0
def login():
    TOKEN_FILE = 'token.pkl'
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)

    """
    me = client.me()
    print('name', me.name)
    print('headline', me.headline)
    print('description', me.description)

    print('following topic count', me.following_topic_count)
    print('following people count', me.following_topic_count)
    print('followers count', me.follower_count)

    print('voteup count', me.voteup_count)
    print('get thanks count', me.thanked_count)

    print('answered question', me.answer_count)
    print('question asked', me.question_count)
    print('collection count', me.collection_count)
    print('article count', me.articles_count)
    print('following column count', me.following_column_count)

    # 获取最近 5 个回答
    for _, answer in zip(range(5), me.answers):
        print(answer.question.title, answer.voteup_count)

    print('----------')

    # 获取点赞量最高的 5 个回答
    for _, answer in zip(range(5), me.answers.order_by('votenum')):
        print(answer.question.title, answer.voteup_count)

    print('----------')

    # 获取最近提的 5 个问题
    for _, question in zip(range(5), me.questions):
        print(question.title, question.answer_count)

    print('----------')

    # 获取最近发表的 5 个文章
    for _, article in zip(range(5), me.articles):
        print(article.title, article.voteup_count)
    """
    topic = client.topic(19560072)  # 转基因
    # topic = client.topic(19578906)  # 气候变化
    # topic = client.topic(19551296)  # 网络游戏

    answers_count = 0
    for question in topic.unanswered_questions:
        print(question.id)
        print(question.title)
        print(question.answer_count)
        answers_count += question.answer_count
        for answer in question.answers:
            print(answer.author.id,answer.author.name)
            answer.save('Data\\Gene\\'+str(question.id)+'#'+question.title, str(answer.author.id)+'#'+answer.author.name)
    print("总共有{0}个回答".format(answers_count))
Пример #19
0
class zhihuspider(basespider):
    def __init__(self):
        super().loadConfig()
        super().prepare()
        self.loadConfig()
        self.prepare()
        self.login()

    def loadConfig(self):
        self.config = self.allConfig['zhihu']

        self.data_path = self.socialRoot + self.config['data_path']
        self.TOKEN_FILE = self.data_path + self.config['TOKEN_FILE']
        self.friends_file = self.data_path + self.config['friends_file']

        self.url_template_question = "https://www.zhihu.com/question/%s"
        self.url_template_answer = "https://www.zhihu.com/question/%s/answer/%s"
        self.url_template_article = "https://zhuanlan.zhihu.com/p/%s"

    def prepare(self):
        if not os.path.isdir(self.data_path): os.makedirs(self.data_path)

        if os.path.isfile(self.friends_file):
            with open(self.friends_file, "rb") as f:
                self.name_map = pickle.load(f)
        else:
            self.name_map = dict()

        self.client = ZhihuClient()

    def login(self):
        if os.path.isfile(self.TOKEN_FILE):
            self.client.load_token(self.TOKEN_FILE)
        else:
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)

        self.me = self.client.me()
        if self.me.over:
            logging.error("login failed! Reason is " + self.me.over_reason)
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)

    def followings2name_map(self, me):
        for peo in me.followings:
            self.name_map[peo.name] = peo.id
        with open(self.friends_file, "wb") as f:
            pickle.dump(self.name_map, f)

    def getActivities(self,
                      userid,
                      count=10,
                      timeOldest=None,
                      timeLatest=None):
        """
		关于actionType
			CREATE_ANSWER
			CREATE_ARTICLE
			CREATE_QUESTION
			FOLLOW_QUESTION
			VOTEUP_ANSWER
		"""
        def getTargetText_Topic(target, actType):
            if isinstance(target, zhihu_oauth.Answer):
                return (target.content, target.question.topics,
                        self.url_template_answer %
                        (target.question.id, target.id))
            elif isinstance(target, zhihu_oauth.Question):
                return (target.detail, target.topics,
                        self.url_template_question % (target.id))
            elif isinstance(target, zhihu_oauth.Article):
                return (target.content, [],
                        self.url_template_article % (target.id))
            else:
                return ("", [], "")

        if isinstance(userid, int): userid = str(userid)
        backuserid = userid
        dtLatest = datetime.datetime(*timeLatest[0:6]) if timeLatest else None
        dtOldest = datetime.datetime(*timeOldest[0:6]) if timeOldest else None

        pp = self.client.people(userid)
        if pp.over:
            if userid not in self.name_map:
                try:
                    self.followings2name_map(self.me)
                except Exception as e:
                    logging.error(str(e))
            if userid in self.name_map:
                userid = self.name_map[userid]
                pp = self.client.people(userid)
            if pp.over: return []

        activityList = []

        cnt = 0
        for act in pp.activities:
            try:
                targetInfo = getTargetText_Topic(act.target, act.type)
                entry = {
                    'username': pp.name,
                    'avatar_url': pp.avatar_url,
                    'headline': pp.headline,
                    'time': time.localtime(act.created_time),
                    'actionType': act.type,
                    'summary': act2str(act),
                    'targetText': targetInfo[0],
                    'topics': list(map(lambda topic: topic.name,
                                       targetInfo[1])),
                    'source_url': targetInfo[2]
                }

                imglist = re.findall(r'(?<=<img src=")(.*?)(?=")',
                                     entry['targetText'])
                if isinstance(act.target,
                              zhihu_oauth.Article) and act.target.image_url:
                    imglist[0:0] = [act.target.image_url]
                if imglist: entry['imgs'] = imglist

                dt = datetime.datetime(*entry['time'][0:6])
                if dtLatest and dtLatest < dt: continue
                if dtOldest and dtOldest > dt: break
                activityList.append(entry)

                cnt += 1
                if cnt >= count: break
            except Exception as e:
                logging.error("getActivities of " + backuserid + " failed")
                traceback.print_exc()

        return activityList
Пример #20
0
# -*- coding: utf-8 -*-
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException

client = ZhihuClient()

client.login_in_terminal('', '')
client.save_token('token.pkl')