def parse_data(cls, data): meta = Meta(pattern=Meta.simple) title = data.find('h2', _class='zm-item-title') try: meta.title = title.string except AttributeError: print(data) raise AttributeError original_url = title.find('a').get_attrs('href') if data.get_attrs('data-type') == 'Answer': head = data.find('div', _class='answer-head') original_url += config.get_setting('API/host') else: head = data.find('div', _class='post-head') meta.original_url = original_url try: author = head.find('a', _class='author-link') meta.author = author.string meta.author_homepage = config.get_setting( 'API/host') + author.get_attrs('href') except AttributeError: try: author = head.find('span', _class='name') meta.author = author.string meta.author_homepage = config.get_setting('API/host') except AttributeError: print(head) raise meta.voteup = int( head.find('div', _class='zm-item-vote-info').get_attrs('data-votecount')) # <meta itemprop="post-id" content="107121832"> # <meta itemprop="answer-id" content="107121832"> # https://www.zhihu.com/node/AnswerVoteInfoV2?params={"answer_id":"203923119"} # https://www.zhihu.com/node/ColumnPostVoteInfoV2?params={"post_id":"103306156"} def stg(r): return { '"': '"', '<': '<', '>': '>' }.get(r.group(0), '') return meta, re.sub('(")|(<)|(>)', stg, data.find('textarea', _class='content').string)
def parse_data(cls, data): meta = Meta() meta.title = data['question']['title'] meta.author = data['author']['name'] meta.voteup = data['voteup_count'] meta.original_url = API.format_url( 'answer_link', question_id=data['question']['id'], answer_id=data['id']) meta.created_date = timer.timestamp_to_date(data['created_time']) meta.author_homepage = API.format_url( 'author_homepage', user_id=data['author']['url_token']) meta.author_avatar_url = data['author']['avatar_url_template'].format(size='l') return meta, data.get('content')
def parse_answer(data): meta = Meta() meta.identity = data.get('id') meta.content = data.get('content') meta.voteup = data.get('voteup_count') meta.author = util.getvalue(data, 'author/name') meta.title = util.getvalue(data, 'question/title') meta.created_date = util.timestamp_to_date(data.get('created_time')) meta.author_homepage = core.ZhihuRequestsApi.get_with_identity( 'AuthorHomePageUrl', util.getvalue(data, 'author/url_token')) meta.source_url = core.ZhihuRequestsApi.get_with_identity( 'AnswerUrl', data.get('id')) meta.author_avatar_url = util.getvalue( data, 'author/avatar_url_template').format(size='l') return meta