Exemplo n.º 1
0
def do_update_story(
    ctx: ActorContext,
    feed_id: T.int,
    offset: T.int,
    content: T.str,
    summary: T.str,
    has_mathjax: T.bool.optional,
    url: T.url,
):
    story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True)
    if not story:
        LOG.error('story#%s,%s not found', feed_id, offset)
        return
    if not is_fulltext_content(content):
        story_text = processor.story_html_to_text(story.content)
        text = processor.story_html_to_text(content)
        if not is_summary(story_text, text):
            msg = 'fetched story#%s,%s url=%r is not fulltext of feed story content'
            LOG.info(msg, feed_id, offset, url)
            return
    data = dict(
        link=url,
        content=content,
        summary=summary,
        has_mathjax=has_mathjax,
    )
    STORY_SERVICE.update_story(feed_id, offset, data)
    _detect_story_images(ctx, story)
Exemplo n.º 2
0
def _update_story(
    story: CommonStory,
    story_content_info: StoryContentInfo,
    content: str,
    summary: str,
    url: str,
    has_mathjax: bool = None,
    sentence_count: int = None
) -> FulltextAcceptStrategy:
    new_info = StoryContentInfo(content)
    accept = decide_accept_fulltext(new_info, story_content_info)
    if accept == FulltextAcceptStrategy.REJECT:
        msg = 'fetched story#%s,%s url=%r is not fulltext of feed story content'
        LOG.info(msg, story.feed_id, story.offset, url)
        return accept
    if accept == FulltextAcceptStrategy.APPEND:
        content = (story.content or '') + '\n<hr/>\n' + (content or '')
    data = dict(
        link=url,
        content=content,
        summary=summary,
        has_mathjax=has_mathjax,
        sentence_count=sentence_count,
    )
    STORY_SERVICE.update_story(story.feed_id, story.offset, data)
    return accept
Exemplo n.º 3
0
    def test_update_story(self):
        storys_0_20 = self.storys[:20]
        modified = STORY_SERVICE.bulk_save_by_feed(self.feed_id,
                                                   storys_0_20,
                                                   batch_size=10)
        self.assertEqual(len(modified), 20)
        self.assert_feed_total_storys(20)
        self.assert_total_story_infos(20)

        story_10 = self.updated_storys[10]
        data = {k: story_10[k] for k in ['content', 'summary', 'dt_published']}
        STORY_SERVICE.update_story(self.feed_id, 10, data)
        content_data = {'content': data['content']}
        STORY_SERVICE.update_story(self.feed_id, 10, content_data)
Exemplo n.º 4
0
def _replace_story_images(feed_id, offset):
    story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True)
    image_processor = StoryImageProcessor(story.link, story.content)
    image_indexs = image_processor.parse()
    image_urls = _image_urls_of_indexs(image_indexs)
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    image_replaces = {}
    for url, status in image_statuses.items():
        if status in IMAGE_REFERER_DENY_STATUS:
            new_url_data = encode_image_url(url, story.link)
            image_replaces[url] = '/api/v1/image/{}?{}'.format(new_url_data, RSSANT_IMAGE_TAG)
    LOG.info(f'story#{feed_id},{offset} {story.link} '
             f'replace {len(image_replaces)} referer deny images')
    # image_processor.process will (1) fix relative url (2) replace image url
    # call image_processor.process regardless of image_replaces is empty or not
    content = image_processor.process(image_indexs, image_replaces)
    STORY_SERVICE.update_story(feed_id, offset, {'content': content})