def main(notify): session = Session() total = session.query(Subreddit).count() count = 0 notify("starting update of %d subs" % total) query = session.query(Subreddit).order_by("scraped_time asc") dbi = DBIterator(query=query, use_offset=None) for subreddit in dbi.results_iter(): count += 1 try: subreddit.update_from_praw(r.get_subreddit(subreddit.url.split('/')[2])) session.add(subreddit) except (praw.requests.exceptions.HTTPError, praw.errors.InvalidSubreddit) as e: print "ERROR", str(e) subreddit.touch() session.add(subreddit) if count % 2000 == 0 and notify is not None: notify("at %d of %d" % (count, total)) if count % 10 == 0: session.commit() session.commit()
def get_subreddit(display_name): try: subreddit = r.get_subreddit(display_name) subreddit.title # force praw to load return subreddit._json_data except praw.errors.InvalidSubreddit: return None except praw.requests.exceptions.HTTPError as e: if Util.is_400_exception(e): return None else: return False
def main(notify): session = Session() gen = r.get_subreddit("all").get_top(limit=3000) start = session.query(Post).count() notify("Getting posts, initial count: %d" % start) count = 0 for post in gen: count += 1 p = Post.get_or_create(session, post.id) p.update_from_praw(post) author_name = Util.patch_author(post.author) Util.update_user(User, p, session, author_name) Util.update_subreddit(Subreddit, p, session, post.subreddit.display_name) session.add(p) session.commit() diff = session.query(Post).count() - start notify("Added %d posts" % diff)
def get_submissions(subreddit_name, limit=50): gen = r.get_subreddit(subreddit_name).get_top(limit=limit) return [sub._json_data for sub in gen]