示例#1
0
def do_update_story_images(
    ctx: ActorContext,
    feed_id: T.int,
    offset: T.int,
    story_url: T.url,
    images: T.list(T.dict(
        url=T.url,
        status=T.int,
    ))
):
    # save image info
    url_root_status = {}
    for img in images:
        url_root = ImageInfo.extract_url_root(img['url'])
        value = (img['status'], img['url'])
        if url_root in url_root_status:
            url_root_status[url_root] = max(value, url_root_status[url_root])
        else:
            url_root_status[url_root] = value
    with transaction.atomic():
        image_info_objects = []
        for url_root, (status, url) in url_root_status.items():
            image_info_objects.append(ImageInfo(
                url_root=url_root,
                sample_url=url,
                referer=story_url,
                status_code=status,
            ))
        LOG.info(f'bulk create {len(image_info_objects)} ImageInfo objects')
        ImageInfo.objects.bulk_create(image_info_objects)
    _replace_story_images(feed_id, offset)
示例#2
0
def _detect_story_images(ctx, story):
    image_processor = StoryImageProcessor(story.link, story.content)
    image_urls = _image_urls_of_indexs(image_processor.parse())
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    num_todo_image_urls = 0
    todo_url_roots = defaultdict(list)
    for url in image_urls:
        status = image_statuses.get(url)
        if status is None:
            num_todo_image_urls += 1
            url_root = ImageInfo.extract_url_root(url)
            todo_url_roots[url_root].append(url)
    LOG.info(
        f'story#{story.feed_id},{story.offset} {story.link} has {len(image_urls)} images, '
        f'need detect {num_todo_image_urls} images '
        f'from {len(todo_url_roots)} url_roots'
    )
    if todo_url_roots:
        todo_urls = []
        for items in todo_url_roots.values():
            if len(items) > 3:
                todo_urls.extend(random.sample(items, 3))
            else:
                todo_urls.extend(items)
        ctx.hope('worker_rss.detect_story_images', dict(
            feed_id=story.feed_id,
            offset=story.offset,
            story_url=story.link,
            image_urls=list(set(todo_urls)),
        ))
    else:
        _replace_story_images(feed_id=story.feed_id, offset=story.offset)