def main():
    from zweb.orm import ormiter
    from douban_spider import  spider

    def url_list():
        for i in ormiter(DoubanUser):
            id = i.id
            if DoubanFetched.get(id):
                continue
            yield douban_recommendation, URL_REC%id, 1
            f = DoubanFetched.get_or_create(id=id)
            f.save()

    spider(url_list())
示例#2
0
def main():
    from zweb.orm import ormiter
    from douban_spider import spider

    def url_list():
        for i in ormiter(DoubanUser):
            id = i.id
            if DoubanFetched.get(id):
                continue
            yield douban_recommendation, URL_REC % id, 1
            f = DoubanFetched.get_or_create(id=id)
            f.save()

    spider(url_list())
示例#3
0
        feed = DoubanFeed.get(i.id)
        if feed.cid == CID_DOUBAN_FEED_TOPIC:
            group_url = feed.topic_id or i.topic
            group = DoubanGroup.by_url(group_url)
            if not group:
                if not group_url in exist:
                    exist.add(group_url)
                    yield parse_group_htm, 'http://www.douban.com/group/%s/' % group_url

            else:
                topic_id = group

        user_id = feed.user_id or i.owner
        if not (user_id and str(user_id).isdigit()):
            user_id = DoubanUser.by_url(user_id)

        if topic_id is not None and user_id:
            feed.topic_id = topic_id
            feed.user_id = user_id
            feed.save()
            i.delete()


if __name__ == '__main__':
    pass

    from douban_spider import spider

    spider(main())
        feed = DoubanFeed.get(i.id)
        if feed.cid == CID_DOUBAN_FEED_TOPIC:
            group_url = feed.topic_id or i.topic
            group = DoubanGroup.by_url(group_url)
            if not group:
                if not group_url in exist:
                    exist.add(group_url)
                    yield parse_group_htm, "http://www.douban.com/group/%s/" % group_url

            else:
                topic_id = group

        user_id = feed.user_id or i.owner
        if not (user_id and str(user_id).isdigit()):
            user_id = DoubanUser.by_url(user_id)

        if topic_id is not None and user_id:
            feed.topic_id = topic_id
            feed.user_id = user_id
            feed.save()
            i.delete()


if __name__ == "__main__":
    pass

    from douban_spider import spider

    spider(main())