Пример #1
0
def _detect_story_images(ctx, story):
    image_processor = StoryImageProcessor(story.link, story.content)
    image_urls = _image_urls_of_indexs(image_processor.parse())
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    num_todo_image_urls = 0
    todo_url_roots = defaultdict(list)
    for url in image_urls:
        status = image_statuses.get(url)
        if status is None:
            num_todo_image_urls += 1
            url_root = ImageInfo.extract_url_root(url)
            todo_url_roots[url_root].append(url)
    LOG.info(
        f'story#{story.feed_id},{story.offset} {story.link} has {len(image_urls)} images, '
        f'need detect {num_todo_image_urls} images '
        f'from {len(todo_url_roots)} url_roots'
    )
    if todo_url_roots:
        todo_urls = []
        for items in todo_url_roots.values():
            if len(items) > 3:
                todo_urls.extend(random.sample(items, 3))
            else:
                todo_urls.extend(items)
        ctx.hope('worker_rss.detect_story_images', dict(
            feed_id=story.feed_id,
            offset=story.offset,
            story_url=story.link,
            image_urls=list(set(todo_urls)),
        ))
    else:
        _replace_story_images(feed_id=story.feed_id, offset=story.offset)
Пример #2
0
def _replace_story_images(feed_id, offset):
    story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True)
    image_processor = StoryImageProcessor(story.link, story.content)
    image_indexs = image_processor.parse()
    image_urls = _image_urls_of_indexs(image_indexs)
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    image_replaces = {}
    for url, status in image_statuses.items():
        if status in IMAGE_REFERER_DENY_STATUS:
            new_url_data = encode_image_url(url, story.link)
            image_replaces[url] = '/api/v1/image/{}?{}'.format(new_url_data, RSSANT_IMAGE_TAG)
    LOG.info(f'story#{feed_id},{offset} {story.link} '
             f'replace {len(image_replaces)} referer deny images')
    # image_processor.process will (1) fix relative url (2) replace image url
    # call image_processor.process regardless of image_replaces is empty or not
    content = image_processor.process(image_indexs, image_replaces)
    STORY_SERVICE.update_story(feed_id, offset, {'content': content})
Пример #3
0
def _replace_story_images(story_id):
    story = Story.objects.get(pk=story_id)
    image_processor = StoryImageProcessor(story.link, story.content)
    image_indexs = image_processor.parse()
    image_urls = _image_urls_of_indexs(image_indexs)
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    image_replaces = {}
    for url, status in image_statuses.items():
        if status in IMAGE_REFERER_DENY_STATUS:
            new_url_data = encode_image_url(url, story.link)
            image_replaces[url] = '/api/v1/image/{}?{}'.format(new_url_data, RSSANT_IMAGE_TAG)
    LOG.info(f'story#{story_id} {story.link} '
             f'replace {len(image_replaces)} referer deny images')
    # image_processor.process will (1) fix relative url (2) replace image url
    # call image_processor.process regardless of image_replaces is empty or not
    content = image_processor.process(image_indexs, image_replaces)
    story.content = content
    story.save()