def main(): from zweb.orm import ormiter from douban_spider import spider def url_list(): for i in ormiter(DoubanUser): id = i.id if DoubanFetched.get(id): continue yield douban_recommendation, URL_REC%id, 1 f = DoubanFetched.get_or_create(id=id) f.save() spider(url_list())
def main(): from zweb.orm import ormiter from douban_spider import spider def url_list(): for i in ormiter(DoubanUser): id = i.id if DoubanFetched.get(id): continue yield douban_recommendation, URL_REC % id, 1 f = DoubanFetched.get_or_create(id=id) f.save() spider(url_list())
feed = DoubanFeed.get(i.id) if feed.cid == CID_DOUBAN_FEED_TOPIC: group_url = feed.topic_id or i.topic group = DoubanGroup.by_url(group_url) if not group: if not group_url in exist: exist.add(group_url) yield parse_group_htm, 'http://www.douban.com/group/%s/' % group_url else: topic_id = group user_id = feed.user_id or i.owner if not (user_id and str(user_id).isdigit()): user_id = DoubanUser.by_url(user_id) if topic_id is not None and user_id: feed.topic_id = topic_id feed.user_id = user_id feed.save() i.delete() if __name__ == '__main__': pass from douban_spider import spider spider(main())
feed = DoubanFeed.get(i.id) if feed.cid == CID_DOUBAN_FEED_TOPIC: group_url = feed.topic_id or i.topic group = DoubanGroup.by_url(group_url) if not group: if not group_url in exist: exist.add(group_url) yield parse_group_htm, "http://www.douban.com/group/%s/" % group_url else: topic_id = group user_id = feed.user_id or i.owner if not (user_id and str(user_id).isdigit()): user_id = DoubanUser.by_url(user_id) if topic_id is not None and user_id: feed.topic_id = topic_id feed.user_id = user_id feed.save() i.delete() if __name__ == "__main__": pass from douban_spider import spider spider(main())