示例#1
0
文件: store.py 项目: Answeror/aip
        def puts(cls, posts):
            posts = list(posts)
            tagnames = set(chain.from_iterable([post['tags'] for post in posts]))
            tags = {name: Tag.get_or_add_bi_name(name) for name in tagnames}

            def inner(posts):
                for kargs in posts:
                    entry = Entry.get_or_add_bi_md5(md5=kargs['md5'], ctime=kargs['ctime'])
                    kargs = {k: v for k, v in kargs.items() if k != 'tags'}
                    try:
                        post = cls.query.filter_by(post_url=kargs['post_url']).options(db.joinedload(cls.entry)).one()
                        if post.entry.md5 != kargs['md5']:
                            raise Exception('md5 changed %s -> %s' % (post.md5, kargs['md5']))
                        post.from_dict(kargs)
                    except NoResultFound:
                        post = cls(entry=entry)
                        post.from_dict(kargs)
                        db.session.add(post)
                    yield post

            flushed = False
            seen = set()
            for kargs, post in zip(posts, list(inner(posts))):
                if post.id is None:
                    if not flushed:
                        db.session.flush()
                        flushed = True
                    db.session.expire(post, ['id'])
                else:
                    if post.id in seen:
                        continue
                    seen.add(post.id)
                for name in set(kargs['tags']):
                    tag = tags[name]
                    db.session.merge(Tagged(post_id=post.id, tag_id=tag.id, entry_id=post.entry.id))
示例#2
0
文件: tasks.py 项目: Answeror/aip
def _update_images(begin=None, limit=65536):
    start = time()
    sources = [make(dict) for make in makesources()]

    from concurrent.futures import ThreadPoolExecutor as Ex
    with Ex(len(sources)) as ex:
        posts = list(chain.from_iterable(
            ex.map(partial(fetch_posts, begin, limit), sources)
        ))

    log.info(
        'fetch posts done, {} fetched, take {}',
        len(posts),
        time() - start
    )
    start = time()
    current_app.store.Post.puts(posts=posts)
    log.info('put posts done, take {}', time() - start)