Пример #1
0
        def parse_author(command):
            result = Match.author(command)
            author_id = result.group('author_id')
            task = SingleTask()
            task.kind = 'author'
            task.spider.href = 'https://www.zhihu.com/people/{}'.format(author_id)
            task.book.kind = 'author'

            client = ZhihuClient()
            try:
                client.load_token(Path.pwd_path + str(u'/ZHIHUTOKEN.pkl'))
            except IOError:
                print u"没有找到登录信息文件,请先登录"
                sys.exit()
            except NeedLoginException:
                print u"登录信息过期,请重新登录"
                sys.exit()
            people_oauth = client.people(author_id)
            _ = people_oauth.follower_count    # zhihu-oauth, issues #4
            author_id_hash = people_oauth.id
            task.book.sql.info = 'select * from AuthorInfo where author_id = "{}"'.format(author_id_hash)
            task.book.sql.question = 'select * from Question where question_id in (select question_id from \
            Answer where author_id = "{}")'.format(author_id_hash)
            task.book.sql.answer = 'select * from Answer where author_id = "{}"'.format(author_id_hash)
            return task
Пример #2
0
def crawling(id):
    #id为问题id
    client = ZhihuClient()
    # 登录
    client.load_token('token.pkl')  # 加载token文件
    question = client.question(id)
    print(u"问题:", question.title)
    print(u"回答数量:", question.answer_count)
    if not os.path.exists(question.title):
        os.mkdir(question.title)
    path = question.title
    index = 1  # 图片序号
    for i, answer in enumerate(question.answers):
        content = answer.content  # 回答内容
        anther = answer.author.name
        re_compile = re.compile(
            r'<img src="(https://pic\d\.zhimg\.com/.*?\.(jpg|png))".*?>')
        img_lists = re.findall(re_compile, content)
        if (img_lists):
            for img in img_lists:
                img_url = img[0]  # 图片url
                image_name = anther + '_' + str(index) + '.jpg'
                if not os.path.exists(path + '/' + image_name):
                    urllib.request.urlretrieve(img_url,
                                               path + '/' + image_name)
                    print(u"成功保存第%d张图片:%s,当前总进度%.2f%%" %
                          (index, image_name, i / question.answer_count * 100))
                index += 1
        print('第%d个答案爬取完成,当前总进度%.2f%%' % (i, i / question.answer_count * 100))
Пример #3
0
def download(cid):
    client = ZhihuClient()
    client.load_token('token.pk1')  # 登陆
    column = client.column(cid)
    images_dir = os.path.join(column.title, 'images')
    regex = re.compile(r"https://pic\d.zhimg.com/", re.IGNORECASE)

    if not os.path.exists(column.title):
        os.makedirs(column.title)

    if not os.path.exists(images_dir):
        os.makedirs(images_dir)

    for index, article in enumerate(column.articles):
        # 处理文章
        article_f = dealArticle(article)

        # 下载图片
        downloadImg(article_f['content'], images_dir)

        # 替换图片路径
        article_f['content'] = re.sub(regex, './images/', article_f['content'])

        # 写入
        with open(os.path.join(column.title, article_f['title']), 'w+') as f:
            f.write(article_f['content'])

        print('[%s] download %s success!' % (str(index), article.title))
Пример #4
0
    def prepare(self):
        if not os.path.isdir(self.data_path): os.makedirs(self.data_path)

        if os.path.isfile(self.friends_file):
            with open(self.friends_file, "rb") as f:
                self.name_map = pickle.load(f)
        else:
            self.name_map = dict()

        self.client = ZhihuClient()
Пример #5
0
 def __init__(self, dbname,email,key):
     self.con = sqlite3.connect(dbname)
     self.cursor = self.con.cursor()
     TOKEN_FILE = 'token.pkl'
     self.zhclient = ZhihuClient()
     try:
         # self.zhclient.login_in_terminal(email, key)
         self.zhclient.login(email, key)
     except NeedCaptchaException:
         print("需要输入验证码,账号 %s 可能已失效" %(email))
Пример #6
0
 def login_zhihu(self):
     """
     登录知乎
     :return:
     """
     self.client = ZhihuClient()
     if os.path.isfile(self.TOKEN_FILE):
         self.client.load_token(self.TOKEN_FILE)
     else:
         self.client.login_in_terminal()
         self.client.save_token(self.TOKEN_FILE)
Пример #7
0
def main():
    #login
    client = ZhihuClient()
    log_in(client)
    Image('./a.gif')
    captcha = input('please input captcha:')
    client.login('account', 'psw', captcha)

    get_data(client)
    write_hk_student_info()
    write_modules()
Пример #8
0
def LoginZhihuClient(token_name):
    TOKEN_FILE = 'liuximing.pkl'
    client = ZhihuClient()
    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)
    me = client.me()
    return me
Пример #9
0
def getLatestBestAnserwerAndSave():
    # phoneNum = '+8613096348217'
    # pw = '2015141463222'

    ans_num = 20
    i=0


    TOKEN_FILE = 'token.pkl'
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)

    # try:
    #     client.login(phoneNum, pw)
    # except NeedCaptchaException:
    #     # 保存验证码并提示输入,重新登录
    #     with open('a.gif', 'wb') as f:
    #         f.write(client.get_captcha())
    #     captcha = input('please input captcha:')
    #     client.login(phoneNum, pw, captcha)

    java = client.topic(19550867)
    BA = java.best_answers
    for answ in BA:
        ansItem2artical(ansItem(answ)).save()
        i = i+1

        if i==ans_num:
            break
Пример #10
0
def login():
    client = ZhihuClient()
    # try:
    #     client.login('username', 'password')
    # except NeedCaptchaException:
    #     # 保存验证码并提示输入,重新登录
    #     with open('a.gif', 'wb') as f:
    #         f.write(client.get_captcha())
    #     captcha = input('please input captcha:')
    #     client.login('email_or_phone', 'password', captcha)
    # client.save_token('token.pkl')
    client.load_token('/Users/huangyukun/scripts/token.pkl')

    return client
Пример #11
0
def zhihu_login():
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login('*****@*****.**', 'a4906639')
        client.save_token(TOKEN_FILE)
    me = client.me()
    print(me.name)
    return client
    def setUp(self):
        super(ZhihuClientClassTest, self).setUp()

        if not os.path.isdir('test') and os.path.isfile('token.pkl'):
            os.chdir('..')

        if not os.path.isfile('test/token.pkl'):
            print('\nno token file, skip all tests.')
            self.skipTest('no token file.')

        self.client = ZhihuClient()

        try:
            self.client.load_token('test/token.pkl')
        except ValueError:
            print('\ntoken version not math python version, skip all tests.')
            self.skipTest('token version not math python version.')
Пример #13
0
def login(username, password):
    from zhihu_oauth import ZhihuClient
    from zhihu_oauth.exception import NeedCaptchaException
    client = ZhihuClient()
    try:
        client.login(username, password)
        print(u"登陆成功!")
    except NeedCaptchaException:  # 处理要验证码的情况
        # 保存验证码并提示输入,重新登录
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('please input captcha:')
        client.login(username, password, captcha)
    client.save_token('token.pkl')  # 保存token
Пример #14
0
class TopicTree:
    client = ZhihuClient()

    def __init__(self):
        pass

    def login(self):
        if os.path.isfile(TOKEN_FILE):
            self.client.load_token(TOKEN_FILE)
        else:
            self.client.login_in_terminal(username='******',
                                          password=self.getpass())
            self.client.save_token(TOKEN_FILE)

    def login_next(self):
        self.client.login_in_terminal(username='******',
                                      password=self.getpass())

    def test(self):
        me = self.client.me()
        print('name', me.name)

    def get_topic(self, uid):
        topic = self.client.topic(uid)

        topic_dic = {}

        topic_dic['id'] = uid
        topic_dic['name'] = topic.name
        topic_dic['children'] = [{
            'id': item._id,
            'name': item.name
        } for item in topic.children]

        with open(out_path, 'a', encoding='utf-8') as fw:
            json.dump(topic_dic, fw, ensure_ascii=False, sort_keys=True)
            fw.write('\n')

        for item in topic_dic['children']:
            cid = item['id']
            try:
                self.get_topic(cid)
                time.sleep(random.random())
            except Exception as e:
                traceback.print_exc()

                print('ban: ', cid)
                self.run(cid)

    def run(self, uid):
        self.login_next()
        self.get_topic(uid)

    def getpass(self):
        # with open('C:\\Users\\BaoQiang\\Desktop\\password.txt', 'r') as f:
        with open('/mnt/home/baoqiang/password.txt', 'r') as f:
            return f.read().strip()
Пример #15
0
 def Analyse(self):
     client = ZhihuClient()
     self.aa.denglu(client)
     self.aa.findquestion(client)
     print'------------find','\n'
     list_q = self.aa.Analyse_question(self.aa.dic_name)
     print'----------q','\n'
     self.aa.Analyse_answer(list_q)#最慢
     print'-------------a','\n'
Пример #16
0
    def login(self):
        TOKEN_FILE = 'token.pkl'

        client = ZhihuClient()

        if os.path.isfile(TOKEN_FILE):
            client.load_token(TOKEN_FILE)
        else:
            client.login_in_terminal()
            client.save_token(TOKEN_FILE)
        return client
Пример #17
0
def zhihu_login():
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login('*****@*****.**', 'a4906639')
        client.save_token(TOKEN_FILE)
    me = client.me()
    print(me.name)
    return client
Пример #18
0
    def setUp(self):
        super(ZhihuClientClassTest, self).setUp()

        if not os.path.isdir('test') and os.path.isfile(TOKEN_FILE_NAME):
            os.chdir('..')

        token_file_path = os.path.join('test', TOKEN_FILE_NAME)

        if not os.path.isfile(token_file_path):
            print('\nno token file, skip all tests.')
            self.skipTest('no token file.')

        self.client = ZhihuClient()

        try:
            self.client.load_token(token_file_path)
        except ValueError:
            print('\ntoken version not math python version, skip all tests.')
            self.skipTest('token version not math python version.')
Пример #19
0
class ZhihuClientClassTest(unittest.TestCase):
    def setUp(self):
        super(ZhihuClientClassTest, self).setUp()

        if not os.path.isdir('test') and os.path.isfile('token.pkl'):
            os.chdir('..')

        if not os.path.isfile('test/token.pkl'):
            print('\nno token file, skip all tests.')
            self.skipTest('no token file.')

        self.client = ZhihuClient()

        try:
            self.client.load_token('test/token.pkl')
        except ValueError:
            print(
                '\ntoken version not math python version, skip all tests.')
            self.skipTest('token version not math python version.')
Пример #20
0
def zhihu_login():
    r"""
    知乎登陆
    :return:        登陆之后的客户端client
    """
    client = ZhihuClient()
    # 登录
    if os.path.isfile(TOKEN_FILE_NAME):
        client.load_token(TOKEN_FILE_NAME)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE_NAME)
    return client
Пример #21
0
def download(uid):
    client = ZhihuClient()
    client.load_token('token.pk1')  # 登陆
    people = client.people(uid)

    for index, answer in enumerate(people.answers):
        # 下载图片
        downloadImg(answer.content)

        # 处理文本
        article = dealArticle(answer)

        # 写入本地
        if not os.path.exists(uid):
            os.makedirs(uid)

        with open(os.path.join(uid, article['title']), 'w+') as f:
            f.write(article['content'])

        print('[%s] download %s success!' %
              (str(index), answer.question.title))
Пример #22
0
class ZhihuClientClassTest(unittest.TestCase):
    def setUp(self):
        super(ZhihuClientClassTest, self).setUp()

        if not os.path.isdir('test') and os.path.isfile(TOKEN_FILE_NAME):
            os.chdir('..')

        token_file_path = os.path.join('test', TOKEN_FILE_NAME)

        if not os.path.isfile(token_file_path):
            print('\nno token file, skip all tests.')
            self.skipTest('no token file.')

        self.client = ZhihuClient()

        try:
            self.client.load_token(token_file_path)
        except ValueError:
            print(
                '\ntoken version not math python version, skip all tests.')
            self.skipTest('token version not math python version.')
Пример #23
0
def main():
    client = ZhihuClient()

    try:
        # client.login(email_or_phone, password)
        client.login_in_terminal(username=email_or_phone, password=password)
        client.save_token(TOKEN_FILE)          # 保存登录会话,留着以后登录用
        # raise NeedCaptchaException
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('请输入验证码: ')
        client.login(email_or_phone, password, captcha)

    data_out_list_a = []
    line_saved = 0
    max_lines = 1

    with open(USER_CSV_PATH) as file:
        for line in file.readlines():
            crawl_id = line.strip('\n')
            my_crawl = MyCrawler(crawl_id, client)
            print('------>>>| 待爬取的用户的知乎id为: ', crawl_id)

            data_a = my_crawl.crawling_answer(crawl_id)
            print('该用户爬取完毕'.center(60, '*'))
            if len(data_a) % 60 == 0:
                tmp_time = int(len(data_a) / 60)
                for i in range(tmp_time):
                    data_out_list_a.append(data_a[60*i:60*(i+1)])
            else:
                print('无用的输出!')

            # sleep(randint(1, 3))
            line_saved += 1

            if line_saved == max_lines:
                save_to_csv_a(data_out_list_a, client)
                data_out_list_a = []
                line_saved = 0

    print('全部用户采集完毕'.center(40, '*'))
Пример #24
0
 def start(self):
     try:
         client = ZhihuClient()
         client.login_in_terminal()
         client.save_token(Path.ZHIHUTOKEN)
     except NeedLoginException:
         print u"Oops, please try again."
         sys.exit()
     return
Пример #25
0
class Login():
    def __init__(self):
        self.TOKEN_FILE = 'token.pkl.' + str(sys.version_info[0])
        self.client = ZhihuClient()

    def client_login(self):
        if not os.path.isfile(self.TOKEN_FILE):
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)
        else:
            self.client.load_token(self.TOKEN_FILE)
        return self.client
Пример #26
0
def main():
    client = ZhihuClient()

    try:
        client.login('*****@*****.**', 'durant')

    except NeedCaptchaException:
        print("Login Error")
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('please input captcha:')
        client.login('*****@*****.**', 'durant', captcha)

    max_lines = 1
    line_saved = 0
    data_out_list_a = []

    with open(USER_TRY_CSV_PATH) as f:
        for line in f.readlines():
            craw_id = line.strip("\n")
            craw = MyCrawler(craw_id, client)
            print(craw_id)

            data_a = craw.crawling_answer(craw_id)
            if len(data_a) % 60 == 0:
                times = int(len(data_a) / 60)
                for i in range(times):
                    data_out_list_a.append(data_a[60 * i:60 * (i + 1)])
            else:
                print("Invalid Output")

            a = random.randint(1, 3)
            time.sleep(a)

            line_saved += 1

            if line_saved == max_lines:
                save_to_csv_a(data_out_list_a)

                data_out_list_a = []

                line_saved = 0
Пример #27
0
import sys  # 修改默认编码
import os  # 添加系统路径
import json

base_path = unicode(os.path.abspath('.').decode(sys.stdout.encoding))
sys.path.append(base_path + u'/src/lib')
sys.path.append(base_path + u'/src/lib/oauth')

reload(sys)
sys.setdefaultencoding('utf-8') # 强制使用utf-8编码

from zhihu_oauth  import  ZhihuClient

from zhihu_oauth.exception import NeedCaptchaException

client = ZhihuClient()

test_email = '*****@*****.**'
test_password = '******'
token_file = './token.pkl'

if os.path.lexists(token_file):
    client.load_token(token_file)
    print 'load token success'
else:
    try:
        login_result = client.login(test_email, test_password)
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        print u'登录失败,需要输入验证码'
        with open('a.gif', 'wb') as f:
Пример #28
0
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('token.pkl')
# replace it  as user input
user = client.people('SakuraNekoq')

# Obtain the mapping
print('business', user.business.name)
print('locations', user.locations[0].name)
Пример #29
0
# -*- coding: utf-8 -*-
import sys
import json
default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
    reload(sys)
    sys.setdefaultencoding(default_encoding)

from zhihu_oauth import ZhihuClient
from neo4j import Database

client = ZhihuClient()
client.load_token('token.pkl')
database = Database()

# me = client.me()


def user_bestanswers():

    j = 0
    while True:
        answerIDs = database.graph.data(
            "match(u:User)-[:AUTHOR]->(a:Answer) where a.answer_topic_corresponded is null return a.answerId as answerId  skip 800 limit 100"
        )
        for answerID in answerIDs:
            try:
                # flag = is_coresspoded(answerID)
                # if flag is 1:
                #     print("抓过了"+str(answerID))
                #     continue
Пример #30
0
# coding=utf-8

from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient


TOKEN_FILE = 'ZHIHUTOKEN.pkl'


client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)
Пример #31
0
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ========================我========================
me = client.me()
# print('活动', me.activities)
# print('答案数', me.answer_count)
# print('答案', me.answers)
# print('文章', me.articles)
# print('文章数', me.articles_count)
# print('头像地址', me.avatar_url)
# print('用户所在行业', me.business)
# print('收藏数', me.collected_count)
# print('收藏夹数', me.collection_count)
# print('收藏夹', me.collections)
# print('专栏数', me.column_count)
# print('专栏', me.columns)
# print('专栏数', me.columns_count)
# created_at = time.localtime(me.created_at)
# print('创建时间', time.strftime("%Y-%m-%d %H:%M:%S", created_at))
# print('个人描述', me.description)
# print('草稿数', me.draft_count)
# print('教育信息', me.educations)
Пример #32
0
class Crawler:
    client_ = ZhihuClient()
    topic_question_df = pd.DataFrame()
    topics_list_ = [19610354, 20010203]  #监控的话题列表
    questions_detail_ = {}  #每个问题的问题标题,问题内容,关注人数,回答人数,回答内容,评论内容,包含话题
    answers_persisted = set()
    answers_to_download = []

    def __init__(self, topics_list=[]):
        if (len(topics_list) > 0):
            self.topic_lists_ = topics_list

        if os.path.isfile(TOKEN_FILE):
            self.client_.load_token(TOKEN_FILE)
        else:
            self.client_.login_in_terminal()
            self.client_.save_token(TOKEN_FILE)

        self.client_.save_token('token.pkl')
        self.db_ = MongoClient(host="localhost", port=27017)[DB_NAME]

    def get_topics_detail(self):
        '''
        dataframe format

        [topic_id, question_id, answer_id_list]

        :return:
        '''
        #old_topics_detail = pd.DataFrame(list(self.db_[TOPICS_COLLECTION].find()))
        #print(old_topics_detail)
        for topic_id in self.topics_list_:
            count = 0
            topic = self.client_.topic(topic_id)
            for q in topic.unanswered_questions:
                if (count > 0):
                    break
                answers = [ans.id for ans in q.answers]
                if (len(answers) > 10):  #只记录有人回答的问题id,减少储存量
                    for ans_id in answers:
                        self.answers_to_download.append(ans_id)
                    count += 1
                    print(len(self.answers_to_download))
                    # new_record = {
                    #     'tid' : topic_id,
                    #     'qid' : q.id,
                    #     'aids' : answers
                    #
                    # }
                    # if(len(old_topics_detail[(old_topics_detail['tid'] == topic_id) & (old_topics_detail['qid'] == q.id)]) > 0):
                    #     self.db_[TOPICS_COLLECTION].update_one({'tid': topic_id, 'qid': q.id}, {'$set': new_record})
                    #     print("update {}".format(new_record))
                    # else:
                    #     self.db_[TOPICS_COLLECTION].insert_one(new_record)
                    #     print("insert {}".format(new_record))
                #TODO-some rest function here
                if (random.randint(0, 100) % 10 == 0):
                    time.sleep(random.randint(1, 3))
                    print("sleep done")

        arr = np.array(self.answers_to_download)
        np.save("answers_to_download.npy", arr)
        #TODO-find out how to update this list

    def get_questions_detail(self):  #只搜集符合一定条件的问题详细内容,如关注者/回答数量等超过一定值,但本值不应较大
        #如果mongodb中已经存了回答内容,那么也不考虑作者更新回答的情况,因为答案通常较长,IO成本较高
        all_answers_id = pd.DataFrame(list(self.db_[TOPICS_COLLECTION].find()))
        for answers in all_answers_id['aids']:  #提取每个问题对应的所有回答id
            for ans_id in answers:

                ans = self.client_.answer(ans_id)
                ans_content = Cleaner.filter_tags(ans.content)
                comment_content = ""
                if (ans.comment_count > 0):
                    for comment in ans.comments:
                        comment_content += Cleaner.filter_tags(
                            comment.content) + "@"  #用@分割,以后可以选择直接测情绪也可以每句评论分开测
                time.sleep(random.randint(1, 4))
                print("ans {} done retrival and cleaning".format(ans.id))

                ans_detail = {
                    'aid': ans_id,
                    'votes': ans.voteup_count,
                    'content': ans_content,
                    'comments': comment_content,
                    'author_follower_num': ans.author.follower_count
                }

                self.db_[ANSWERS_COLLECTION].insert_one(ans_detail)

        #TODO-data cleaning

        #TODO-in what format to save these data

    def parse_img_src(self, html):
        replace_pattern = r'<[img|IMG].*?/>'  # img标签的正则式
        img_url_pattern = r'.+?src="(\S+)"'  # img_url的正则式
        replaced_img_url_list = []
        img_url_list = []
        need_replace_list = re.findall(replace_pattern, html)  # 找到所有的img标签
        for tag in need_replace_list:
            img_url_list.append(re.findall(img_url_pattern,
                                           tag)[0])  # 找到所有的img_url
        return img_url_list

    def save_img(self, img_url, file_name, file_path='./images/'):
        # 保存图片到磁盘文件夹 file_path中,默认为当前脚本运行目录下的 book\img文件夹
        try:
            if not os.path.exists(file_path):
                # os.mkdir(file_path)
                os.makedirs(file_path)
            # 获得图片后缀
            #file_suffix = os.path.splitext(img_url)[1]
            # 拼接图片名(包含路径)
            filename = '{}{}{}'.format(file_path, os.sep, file_name)
            # 下载图片,并保存到文件夹中
            img = requests.get(img_url)
            with open(filename, "wb") as f:
                f.write(img.content)
        except IOError as e:
            print("error")
        except Exception as e:
            print("error")

    def download_images(self, path):
        '''
            answers_to_download里回答的images下载存下来
        :return:
        '''
        for ans_id in self.answers_to_download:
            answer = self.client_.answer(ans_id)
            content = answer.content
            img_src_list = self.parse_img_src(content)
            print("img src list : {}".format(img_src_list))
            for img_url in img_src_list:
                self.save_img(img_url, img_url[-10:], "./images")
                print("picture {} download complete".format(img_url))
            print("answer {} download complete".format(ans_id))
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ============专栏模块============
cid = 10000
column = client.column(cid)
print('文章数', column.article_count)
print('文章', column.articles)
print('文章数', column.articles_count)
print('作者', column.author)
print('能否评论', column.comment_permission)
print('描述', column.description)
print('关注人数', column.follower_count)
print('关注人', column.followers)
print('问题ID', column.id)
print('图像地址', column.image_url)
print('标题', column.title)
print('是否更新过', column.updated)
print('更新时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(column.updated_time)))

# ================运行时间计时================
run_time = time.time() - start_time
if run_time < 60:  # 两位小数的秒
Пример #34
0
# coding=utf-8

from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient


TOKEN_FILE = 'token.pkl'


client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login('*****@*****.**', 'Zhihu2Ebook')
    client.save_token(TOKEN_FILE)

Пример #35
0
path_prefix = '/Users/alicewish/我的坚果云/'  # 文件地址前缀
txt_file_path = path_prefix + file_name + '.txt'  # TXT文件名
# ================按行读取文本================
text_readline = []  # 初始化按行存储数据列表,不接受结尾换行符
with open(txt_file_path) as fin:
    for line in fin:
        text_readline.append((line).replace('\n', ''))
print(text_readline)

for i in range(len(text_readline)):
    print(text_readline[i])
# ================读取账号和密码================
account = text_readline[0]
passward = text_readline[1]

client = ZhihuClient()

try:
    client.login(account, passward)
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login(account, passward, captcha)

# 必须在 client 已经处于登录状态时才能使用
client.save_token('/Users/alicewish/我的坚果云/token.pkl')

# ================运行时间计时================
run_time = time.time() - start_time
Пример #36
0
from __future__ import unicode_literals, print_function

import os
import re
import time

from zhihu_oauth import ZhihuClient
from zhihu_oauth import Topic
from zhihu_oauth import exception


TOKEN_FILE = 'token.cache'
TOP_SIZE = 50

# Login 
client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)

# The topest root of topic
root_topic = client.topic(19776749)
# The array to store top hot topic
hot_topics = list()
# Whether the hot_topics is full
hot_topics_full = False
# The fewest topic in hot_topics
last_topic = {}
    return os.listdir()


def get_answers():
    return os.listdir(collection_id)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--collection", "-c", help="Set collection ID, e.g. 99549491")
    parser.add_argument("--thread", "-t", help="(Optional) Set the threads, 3 to 7 is recommended, default value is 5.")
    parser.add_argument("--upload", "-u", action='store_true', help="Upload the answers to Zhihu.")
    parser.add_argument("--download", "-d", action='store_true', help="Download the answers from Zhihu.")

    global client
    client = ZhihuClient()
    client.login_in_terminal()
    me = client.me()

    args = parser.parse_args()

    if args.collection:
        print("Selected collection: ", str(args.collection))
        collection = client.collection(int(args.collection))
        global collection_id
        collection_id = str(collection.id)
    else:
        print("YOU MUST TELL ME WHICH COLLECTION YOU WANT TO ADD IN!!")
        exit()

    if args.thread:
Пример #38
0
def login(username, password):
    client = ZhihuClient()
    client.login_in_terminal(username, password)
    return client
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ========================查询问题========================
qid = 48217184
question = client.question(qid)
print('允许删除', question.allow_delete)
print('答案数', question.answer_count)
print('答案', question.answers)
print('评论数', question.comment_count)
print('评论', question.comments)
print('细节', question.detail)
print('摘录', question.excerpt)
print('关注数', question.follower_count)
print('关注人', question.followers)
print('问题ID', question.id)
print('重定向', question.redirection)
print('状态', question.status)
print('建议修改', question.suggest_edit)
print('标题', question.title)
print('话题', question.topics)
print('更新时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(question.updated_time)))
Пример #40
0
# Create in 2019-05-29
# Project: Store data from Zhihu in Mysql Database
# Author: SHIELD_QIQI
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException
from Database import MysqlQI
import jieba
import jieba.analyse
import re
from collections import Counter
import wordcloud

client = ZhihuClient()
# 直接用首次登陆的token文件登陆我的知乎
client.load_token('token.pkl')

# 在数据库中添加删除表为存储数据做准备
mysql = MysqlQI()

# # 删除数据库(urldata)中的所有的表
# tableNum = mysql.cur.execute("SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') ""FROM information_schema.tables WHERE table_schema = 'urldata';")
# print("-------------------------------------\n数据库中原有"+str(tableNum)+"个表")
# print("正在删除.....")
# for i in range(0,tableNum):
#     mysql.cur.execute("SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') ""FROM information_schema.tables WHERE table_schema = 'urldata';")
#     mysql.cur.execute(mysql.cur.fetchone()[0])
#
# tableNum = mysql.cur.execute("SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') ""FROM information_schema.tables WHERE table_schema = 'urldata';")
# if tableNum == 0:
#     print("该数据库下所有表删除完毕\n---------------------------------------")
# else:
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ============文章模块============
aid = 10000
article = client.article(aid)
print('作者', article.author)
print('能否评论', article.can_comment)
print('从属专栏', article.column)
print('评论数', article.comment_count)
print('评论权限', article.comment_permission)
print('评论', article.comments)
print('内容', article.content)
print('摘录', article.excerpt)
print('文章ID', article.id)
print('图像地址', article.image_url)
print('建议修改', article.suggest_edit)
print('标题', article.title)
print('更新时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(article.updated_time)))
print('赞同数', article.voteup_count)

# ================运行时间计时================
run_time = time.time() - start_time
Пример #42
0
__author__ = 'ipreacher'

import time
import numpy as np
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException

client = ZhihuClient()
client.login_in_terminal()

me = client.me()
t = '你好,感谢关注!\n新的一年里,祝学业进步,工作顺利!\n[This is sent by a robot.]\nipreacher' 
for f in me.followers:
	print(f.name)
	me.message(f, t)
	time.sleep(10 * abs(np.random.randn()))
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ========================查询答案========================
aid = 34404209
answer = client.answer(aid)
print('作者', answer.author)
print('能否评论', answer.can_comment)
print('收藏夹', answer.collections)
print('评论数', answer.comment_count)
print('评论权限', answer.comment_permission)
print('评论', answer.comments)
print('内容', answer.content)
print('创建时间', answer.created_time)
print('摘录', answer.excerpt)
print('答案ID', answer.id)
print('能否复制', answer.is_copyable)
print('是我回答的吗', answer.is_mine)
print('从属问题', answer.question)
print('建议修改', answer.suggest_edit)
print('感谢数', answer.thanks_count)
print('更新时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(answer.updated_time)))
print('投票者', answer.voters)
Пример #44
0
# coding=utf-8
# 通过昵称搜索用户token_url
# 具体数据见Excel文件 zhihu_users_token_url.xlsx
from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient

TOKEN_FILE = 'token.pkl'
client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)

user = []
file = open("C:\\Users\\80693\\Desktop\\User_name.txt")
lines = file.readlines()
file.close()
for line in lines:
    line = line.strip().split('\t')
    peoples = client.search(line, search_type='PEOPLE')
    people_five = [p for _, p in zip(range(5), peoples)]
    pp = sorted(peoples, key=lambda x: x.obj.follower_count, reverse=True)
    p = pp[0].obj
    print(p.name.ljust(14), p.id.ljust(40), p.follower_count)
    user.append(p.id)
Пример #45
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-

import logging

from logging import StreamHandler
from flask import Flask, jsonify, redirect
from flask_cache import Cache
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('token.pkl')

me = client.me()

cache = Cache(config={'CACHE_TYPE': 'simple'})

app = Flask(__name__)
cache.init_app(app)

file_handler = StreamHandler()
app.logger.setLevel(logging.DEBUG)
app.logger.addHandler(file_handler)


@app.route('/', methods=['GET'])
def index_route():
    return jsonify({
        'author': 'knarfeh',
        'author_url': 'http://www.knarfeh.com',
        'people': 'http://zhihu-api.knarfeh.com/people/<people_id>',
Пример #46
0
from __future__ import unicode_literals, print_function
from RedisQueue import RedisQueue
from zhihu_oauth import ZhihuClient
import datetime
import time
import random
import sys
from timeout import timeout
import os
from utils import print_err
from pymongo import MongoClient

MAX_SLEEP_TIME = 15
Cookies_File = './cookies/cookies%s.json' % sys.argv[1]
global client
client = ZhihuClient()
if os.path.isfile(Cookies_File):
    client.load_token(Cookies_File)
else:
    client_info = open('./cookies/client_info_list.data').readlines()
    client_info = client_info[int(sys.argv[1])].strip().split('\t')
    client.login_in_terminal(client_info[0], client_info[1])
    client.save_token(Cookies_File)


def get_user_questions(uname):
    global client
    if uname == '':
        return
    print(uname)
Пример #47
0
from pylon import create_logger
log = create_logger(__file__)
log_error = create_logger(__file__ + '.error')


class ZhihuParseError(Exception):
  def __init__(self, msg=None, value=None):
    self.value = value
    self.msg = msg




TOKEN_FILE = 'token.pkl'
client = ZhihuClient()
client.load_token(TOKEN_FILE)






def zhihu_answer_url(answer):
  '''貌似答案被删也不报错'''
  # log('zhihu_answer_url answer' + str(answer))
  if isinstance(answer, int):
    answer = client.answer(answer)
  return 'https://www.zhihu.com/question/{}/answer/{}'.format(answer.question.id, answer.id)

Пример #48
0
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ========================我========================
me = client.me()
# print('活动', me.activities)
# print('答案数', me.answer_count)
# print('答案', me.answers)
# print('文章', me.articles)
# print('文章数', me.articles_count)
# print('头像地址', me.avatar_url)
# print('用户所在行业', me.business)
# print('收藏数', me.collected_count)
# print('收藏夹数', me.collection_count)
# print('收藏夹', me.collections)
# print('专栏数', me.column_count)
# print('专栏', me.columns)
# print('专栏数', me.columns_count)
# created_at = time.localtime(me.created_at)
# print('创建时间', time.strftime("%Y-%m-%d %H:%M:%S", created_at))
# print('个人描述', me.description)
# print('草稿数', me.draft_count)
# print('教育信息', me.educations)
Пример #49
0
    for _,topic in zip(range(30),list) :
        name11=topic.name
        best=topic.best_answers
        for _,answer in zip(range(30),best):
            people=answer.author
            if people.name!=namen.decode("utf-8"):
                people1 = []
                for topic in people.badge.topics:
                    if topic.name.find(toname.decode("utf-8"))!=-1:
                        people1.append(topic.name)
                dicname[people.name]=people1
    return dicname

if __name__ == '__main__':
    clean('D:/' + u'py程序' + '/answer1')
    client = ZhihuClient()

    a=get("你好")
    a.denglu(client)
    me = client.me()
    print me.id
    # a.findquestion(client)
    #
    # list_q=a.Analyse_question(a.dic_name,a.qqcount)
    #
    # a.Analyse_answer(list_q)
    # #Anasly_location(dicall1)
    # #
    # # # 得到词云回答者信息
    # # path1 = 'D:/' + u'py程序' + '/answer/people_qb.txt'
    # # hello.ciyun1(path1)
Пример #50
0
# coding=utf-8
# https://pypi.org/project/zhihu-oauth

from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient
from zhihu_oauth import SearchType

TOKEN_FILE = 'token.pkl'

client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)

me = client.me()

print('name', me.name)
print('headline', me.headline)
print('description', me.description)

print('following topic count', me.following_topic_count)
print('following people count', me.following_topic_count)
print('followers count', me.follower_count)

print('voteup count', me.voteup_count)
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ============用户模块============
pid = "edna-krabappel"

people = client.people(pid)
print('活动', people.activities)
print('答案数', people.answer_count)
print('答案', people.answers)
print('文章', people.articles)
print('文章数', people.articles_count)
print('头像地址', people.avatar_url)
print('用户所在行业', people.business)
print('收藏数', people.collected_count)
print('收藏夹数', people.collection_count)
print('收藏夹', people.collections)
print('专栏数', people.column_count)
print('专栏', people.columns)
print('专栏数', people.columns_count)
print('创建时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(people.created_at)))
print('个人描述', people.description)
print('草稿数', people.draft_count)