def getLatestBestAnserwerAndSave(): # phoneNum = '+8613096348217' # pw = '2015141463222' ans_num = 20 i=0 TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) # try: # client.login(phoneNum, pw) # except NeedCaptchaException: # # 保存验证码并提示输入,重新登录 # with open('a.gif', 'wb') as f: # f.write(client.get_captcha()) # captcha = input('please input captcha:') # client.login(phoneNum, pw, captcha) java = client.topic(19550867) BA = java.best_answers for answ in BA: ansItem2artical(ansItem(answ)).save() i = i+1 if i==ans_num: break
def start(self): try: client = ZhihuClient() client.login_in_terminal() client.save_token(Path.ZHIHUTOKEN) except NeedLoginException: print u"Oops, please try again." sys.exit() return
def LoginZhihuClient(token_name): TOKEN_FILE = 'liuximing.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) me = client.me() return me
def get_client(self, reset_=0): client = ZhihuClient() if reset_ != 0: client.login_in_terminal() client.save_token(TOKEN_FILE) if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) return client
def login(self): TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) return client
class Login(): def __init__(self): self.TOKEN_FILE = 'token.pkl.' + str(sys.version_info[0]) self.client = ZhihuClient() def client_login(self): if not os.path.isfile(self.TOKEN_FILE): self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) else: self.client.load_token(self.TOKEN_FILE) return self.client
def zhihu_login(): r""" 知乎登陆 :return: 登陆之后的客户端client """ client = ZhihuClient() # 登录 if os.path.isfile(TOKEN_FILE_NAME): client.load_token(TOKEN_FILE_NAME) else: client.login_in_terminal() client.save_token(TOKEN_FILE_NAME) return client
def main(): client = ZhihuClient() try: # client.login(email_or_phone, password) client.login_in_terminal(username=email_or_phone, password=password) client.save_token(TOKEN_FILE) # 保存登录会话,留着以后登录用 # raise NeedCaptchaException except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('请输入验证码: ') client.login(email_or_phone, password, captcha) data_out_list_a = [] line_saved = 0 max_lines = 1 with open(USER_CSV_PATH) as file: for line in file.readlines(): crawl_id = line.strip('\n') my_crawl = MyCrawler(crawl_id, client) print('------>>>| 待爬取的用户的知乎id为: ', crawl_id) data_a = my_crawl.crawling_answer(crawl_id) print('该用户爬取完毕'.center(60, '*')) if len(data_a) % 60 == 0: tmp_time = int(len(data_a) / 60) for i in range(tmp_time): data_out_list_a.append(data_a[60*i:60*(i+1)]) else: print('无用的输出!') # sleep(randint(1, 3)) line_saved += 1 if line_saved == max_lines: save_to_csv_a(data_out_list_a, client) data_out_list_a = [] line_saved = 0 print('全部用户采集完毕'.center(40, '*'))
from __future__ import unicode_literals, print_function import os from zhihu_oauth import ZhihuClient from zhihu_oauth import SearchType TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) me = client.me() print('name', me.name) print('headline', me.headline) print('description', me.description) print('following topic count', me.following_topic_count) print('following people count', me.following_topic_count) print('followers count', me.follower_count) print('voteup count', me.voteup_count) print('get thanks count', me.thanked_count)
import sys from timeout import timeout import os from utils import print_err from pymongo import MongoClient MAX_SLEEP_TIME = 15 Cookies_File = './cookies/cookies%s.json' % sys.argv[1] global client client = ZhihuClient() if os.path.isfile(Cookies_File): client.load_token(Cookies_File) else: client_info = open('./cookies/client_info_list.data').readlines() client_info = client_info[int(sys.argv[1])].strip().split('\t') client.login_in_terminal(client_info[0], client_info[1]) client.save_token(Cookies_File) def get_user_questions(uname): global client if uname == '': return print(uname) user_questions = dict() try: people = client.people(uname) user_questions['_id'] = uname user_questions['owner'] = uname user_questions['questions'] = []
# coding=utf-8 from __future__ import unicode_literals, print_function import os from zhihu_oauth import ZhihuClient TOKEN_FILE = 'ZHIHUTOKEN.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE)
def login(username, password): client = ZhihuClient() client.login_in_terminal(username, password) return client
from zhihu_oauth import ZhihuClient from zhihu_oauth.exception import NeedCaptchaException client = ZhihuClient() user = '******' pwd = '6666666' try: client.login_in_terminal(user, pwd) print(u"登陆成功!") except NeedCaptchaException: # 处理要验证码的情况 # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login_in_terminal(user, pwd, captcha) print(u"登陆成功!") client.save_token('token.pkl') # 保存token # 必须在 client 已经处于登录状态时才能使用 #有了token之后,下次登录就可以直接加载token文件了 # client.load_token('filename') # client.login_in_terminal() # or ('*****@*****.**', 'password')
# coding=utf-8 from __future__ import unicode_literals, print_function import os from zhihu_oauth import ZhihuClient TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal(use_getpass=False) client.save_token(TOKEN_FILE)
#!/usr/local/bin/python3 #install: #pip install -U zhihu_oauth import os abspath = os.path.abspath(__file__) dname = os.path.dirname(abspath) os.chdir(dname) from zhihu_oauth import ZhihuClient client = ZhihuClient() try: client.load_token('token.pkl') except: client.login_in_terminal('xxxxxxxxxxusernamexxxxxxxxxxxx', 'xxxxxxxxxxxpasswordxxxxxxxxxxx') client.save_token('token.pkl') client.load_token('token.pkl') outfile = "已订阅专栏.xml" with open(outfile, 'w', encoding='utf-8') as output: output.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") output.write("<opml version=\"1.0\">\n") output.write(" <body>\n") me = client.me() columns = me.following_columns for column in columns: url = "https://trtyheavef-rsshub.herokuapp.com/zhihu/zhuanlan/" + u"{}".format(column.id) #url = 'https://zhuanlan.zhihu.com/' + u"{}".format(column.id)
class ZhiHu(object): TOKEN_FILE = 'token.pkl' def __init__(self): """ 初始化 """ self.login_zhihu() self.db = EasySqlite('zhihu.db') def login_zhihu(self): """ 登录知乎 :return: """ self.client = ZhihuClient() if os.path.isfile(self.TOKEN_FILE): self.client.load_token(self.TOKEN_FILE) else: self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) def save_quesions(self, topic_id): """ 保存话题下的问题 :param topic_id: :return: """ topic = self.client.topic(topic_id) print(topic) questions = topic.unanswered_questions sql_tmp = 'replace into questions values(?,?,?,?,?,?)' for question in questions: if question.answer_count < 10: continue row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count, topic_id] print(row) ret = self.db.update(sql_tmp, args=row) if not ret: print('insert error!') else: print('insert success!') def save_answer_info(self, question_id): """ 保存指定问题的答案概况 :param question_id: :return: """ question = self.client.question(question_id) print(question.title) answers = question.answers for answer in answers: print(answer.comment_count, answer.excerpt, answer.question, answer.thanks_count, answer.voteup_count) answer.save() break # sql_tmp = 'replace into questions values(?,?,?,?,?,?)' # for question in questions: # if question.answer_count < 10: # continue # row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count, # topic_id] # print(row) # ret = self.db.update(sql_tmp, args=row) # if not ret: # print('insert error!') # else: # print('insert success!') def to_md(self, topic, file_name): sql = "select * from questions where topic_id = '%s' order by follower_count desc limit 1000" % topic ret = self.db.query(sql) line_tmp = "%s. [%s](https://www.zhihu.com/question/%s) 关注数:%s 回答数:%s 评论数:%s<br>\n" i = 1 with open(file_name, 'w', encoding='utf8') as f: for item in ret: line = line_tmp % (i, item['title'], item['id'], item['follower_count'], item['answer_count'], item['comment_count']) f.write(line) i += 1
#encoding=utf8 from __future__ import unicode_literals, print_function import os import re import urllib import math import numpy as np from zhihu_oauth import ZhihuClient TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal('*****@*****.**', 'xhdbfs1234567') client.save_token(TOKEN_FILE) id = 24400664 question = client.question(id) print("question: " + question.title) print("回答数量: ", question.answer_count) #os.mkdir(question.title+" pics") path = question.title + " pics" index = 1 arr = [] for answer in question.answers: arr.append(answer.voteup_count) mean = np.mean(arr) print(mean) std = np.std(arr) print(std) for answer in question.answers: if answer.voteup_count < 2000:
def login(): TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) """ me = client.me() print('name', me.name) print('headline', me.headline) print('description', me.description) print('following topic count', me.following_topic_count) print('following people count', me.following_topic_count) print('followers count', me.follower_count) print('voteup count', me.voteup_count) print('get thanks count', me.thanked_count) print('answered question', me.answer_count) print('question asked', me.question_count) print('collection count', me.collection_count) print('article count', me.articles_count) print('following column count', me.following_column_count) # 获取最近 5 个回答 for _, answer in zip(range(5), me.answers): print(answer.question.title, answer.voteup_count) print('----------') # 获取点赞量最高的 5 个回答 for _, answer in zip(range(5), me.answers.order_by('votenum')): print(answer.question.title, answer.voteup_count) print('----------') # 获取最近提的 5 个问题 for _, question in zip(range(5), me.questions): print(question.title, question.answer_count) print('----------') # 获取最近发表的 5 个文章 for _, article in zip(range(5), me.articles): print(article.title, article.voteup_count) """ topic = client.topic(19560072) # 转基因 # topic = client.topic(19578906) # 气候变化 # topic = client.topic(19551296) # 网络游戏 answers_count = 0 for question in topic.unanswered_questions: print(question.id) print(question.title) print(question.answer_count) answers_count += question.answer_count for answer in question.answers: print(answer.author.id,answer.author.name) answer.save('Data\\Gene\\'+str(question.id)+'#'+question.title, str(answer.author.id)+'#'+answer.author.name) print("总共有{0}个回答".format(answers_count))
class zhihuspider(basespider): def __init__(self): super().loadConfig() super().prepare() self.loadConfig() self.prepare() self.login() def loadConfig(self): self.config = self.allConfig['zhihu'] self.data_path = self.socialRoot + self.config['data_path'] self.TOKEN_FILE = self.data_path + self.config['TOKEN_FILE'] self.friends_file = self.data_path + self.config['friends_file'] self.url_template_question = "https://www.zhihu.com/question/%s" self.url_template_answer = "https://www.zhihu.com/question/%s/answer/%s" self.url_template_article = "https://zhuanlan.zhihu.com/p/%s" def prepare(self): if not os.path.isdir(self.data_path): os.makedirs(self.data_path) if os.path.isfile(self.friends_file): with open(self.friends_file, "rb") as f: self.name_map = pickle.load(f) else: self.name_map = dict() self.client = ZhihuClient() def login(self): if os.path.isfile(self.TOKEN_FILE): self.client.load_token(self.TOKEN_FILE) else: self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) self.me = self.client.me() if self.me.over: logging.error("login failed! Reason is " + self.me.over_reason) self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) def followings2name_map(self, me): for peo in me.followings: self.name_map[peo.name] = peo.id with open(self.friends_file, "wb") as f: pickle.dump(self.name_map, f) def getActivities(self, userid, count=10, timeOldest=None, timeLatest=None): """ 关于actionType CREATE_ANSWER CREATE_ARTICLE CREATE_QUESTION FOLLOW_QUESTION VOTEUP_ANSWER """ def getTargetText_Topic(target, actType): if isinstance(target, zhihu_oauth.Answer): return (target.content, target.question.topics, self.url_template_answer % (target.question.id, target.id)) elif isinstance(target, zhihu_oauth.Question): return (target.detail, target.topics, self.url_template_question % (target.id)) elif isinstance(target, zhihu_oauth.Article): return (target.content, [], self.url_template_article % (target.id)) else: return ("", [], "") if isinstance(userid, int): userid = str(userid) backuserid = userid dtLatest = datetime.datetime(*timeLatest[0:6]) if timeLatest else None dtOldest = datetime.datetime(*timeOldest[0:6]) if timeOldest else None pp = self.client.people(userid) if pp.over: if userid not in self.name_map: try: self.followings2name_map(self.me) except Exception as e: logging.error(str(e)) if userid in self.name_map: userid = self.name_map[userid] pp = self.client.people(userid) if pp.over: return [] activityList = [] cnt = 0 for act in pp.activities: try: targetInfo = getTargetText_Topic(act.target, act.type) entry = { 'username': pp.name, 'avatar_url': pp.avatar_url, 'headline': pp.headline, 'time': time.localtime(act.created_time), 'actionType': act.type, 'summary': act2str(act), 'targetText': targetInfo[0], 'topics': list(map(lambda topic: topic.name, targetInfo[1])), 'source_url': targetInfo[2] } imglist = re.findall(r'(?<=<img src=")(.*?)(?=")', entry['targetText']) if isinstance(act.target, zhihu_oauth.Article) and act.target.image_url: imglist[0:0] = [act.target.image_url] if imglist: entry['imgs'] = imglist dt = datetime.datetime(*entry['time'][0:6]) if dtLatest and dtLatest < dt: continue if dtOldest and dtOldest > dt: break activityList.append(entry) cnt += 1 if cnt >= count: break except Exception as e: logging.error("getActivities of " + backuserid + " failed") traceback.print_exc() return activityList
# -*- coding: utf-8 -*- from zhihu_oauth import ZhihuClient from zhihu_oauth.exception import NeedCaptchaException client = ZhihuClient() client.login_in_terminal('', '') client.save_token('token.pkl')