示例#1
0
    def parse_data(cls, data):
        meta = Meta(pattern=Meta.simple)

        title = data.find('h2', _class='zm-item-title')
        try:
            meta.title = title.string
        except AttributeError:
            print(data)
            raise AttributeError
        original_url = title.find('a').get_attrs('href')

        if data.get_attrs('data-type') == 'Answer':
            head = data.find('div', _class='answer-head')
            original_url += config.get_setting('API/host')
        else:
            head = data.find('div', _class='post-head')

        meta.original_url = original_url

        try:
            author = head.find('a', _class='author-link')
            meta.author = author.string
            meta.author_homepage = config.get_setting(
                'API/host') + author.get_attrs('href')
        except AttributeError:
            try:
                author = head.find('span', _class='name')
                meta.author = author.string
                meta.author_homepage = config.get_setting('API/host')
            except AttributeError:
                print(head)
                raise

        meta.voteup = int(
            head.find('div',
                      _class='zm-item-vote-info').get_attrs('data-votecount'))

        # <meta itemprop="post-id" content="107121832">
        # <meta itemprop="answer-id" content="107121832">
        # https://www.zhihu.com/node/AnswerVoteInfoV2?params={"answer_id":"203923119"}
        # https://www.zhihu.com/node/ColumnPostVoteInfoV2?params={"post_id":"103306156"}

        def stg(r):
            return {
                '&quot;': '"',
                '&lt;': '<',
                '&gt;': '>'
            }.get(r.group(0), '')

        return meta, re.sub('(&quot;)|(&lt;)|(&gt;)', stg,
                            data.find('textarea', _class='content').string)
示例#2
0
    def parse_data(cls, data):
        meta = Meta()
        meta.title = data['question']['title']
        meta.author = data['author']['name']
        meta.voteup = data['voteup_count']
        meta.original_url = API.format_url(
            'answer_link', question_id=data['question']['id'], answer_id=data['id'])

        meta.created_date = timer.timestamp_to_date(data['created_time'])
        meta.author_homepage = API.format_url(
            'author_homepage', user_id=data['author']['url_token'])

        meta.author_avatar_url = data['author']['avatar_url_template'].format(size='l')

        return meta, data.get('content')