class RedditCounter: def __init__(self, subreddit, earliest_date_to_get = None, max_posts = 100, user_agent = CounterUtil.USER_AGENT): self.reddit_client = praw.Reddit(user_agent=user_agent) self.subreddit = subreddit if earliest_date_to_get is None: self.earliest_date_to_get = datetime(1985, 0, 0) else: self.earliest_date_to_get = earliest_date_to_get if max_posts is None or max_posts < 0: self.max_posts = float('inf') else: self.max_posts = max_posts self.posts_retrieved = 0 self.last_submission_retrieved = None self.min_sleep_secs = 2.01 self.tracker = CounterTracker() self.saver = ProgressSaver() def process_submission(self, submission): sub_tag = CounterUtil.extract_tag(submission.title) self.tracker.add(sub_tag, submission.created_utc) self.saver.add_record({submission.created_utc: sub_tag}) def get_submissions(self, after_submission = None): params = {} if after_submission is not None: params = {'after': CounterUtil.LINK_PREFIX + after_submission.id} submissions = list(self.reddit_client.get_subreddit(self.subreddit).get_new(limit=CounterUtil.MAX_POSTS_PER_API_REQUEST, after_field='id', params = params)) return submissions def print_progress(self): print 'Retrieved ' + str(self.posts_retrieved) + ' submissions so far' print 'Earliest date and time retrieved so far: ' + str(datetime.fromtimestamp(self.last_submission_retrieved.created_utc)) def _should_keep_running(self): return (self.posts_retrieved < self.max_posts) and (self.last_submission_retrieved is None or datetime.fromtimestamp(self.last_post_retrieved.created_utc) > self.earliest_date_to_get) def run(self): submissions = self.get_submissions() while (submissions is not None) and len(submissions) > 0 and self._should_keep_running(): for submission in submissions: self.process_submission(submission) self.last_post_retrieved = submission self.last_submission_retrieved = submission self.posts_retrieved += 1 self.print_progress() self.saver.write_out('progress.json') time.sleep(self.min_sleep_secs) submissions = self.get_submissions(self.last_submission_retrieved) print('Submissions result length: ' + str(len(submissions)) + ', Should keep running? ' + str(self._should_keep_running())) self.tracker.print_top_tags()
def __init__(self, subreddit, earliest_date_to_get = None, max_posts = 100, user_agent = CounterUtil.USER_AGENT): self.reddit_client = praw.Reddit(user_agent=user_agent) self.subreddit = subreddit if earliest_date_to_get is None: self.earliest_date_to_get = datetime(1985, 0, 0) else: self.earliest_date_to_get = earliest_date_to_get if max_posts is None or max_posts < 0: self.max_posts = float('inf') else: self.max_posts = max_posts self.posts_retrieved = 0 self.last_submission_retrieved = None self.min_sleep_secs = 2.01 self.tracker = CounterTracker() self.saver = ProgressSaver()
def test_get_mean_singel_value(self): tr = TempTracker() tr.insert(50) self.assertEqual(tr.get_mean(), 50.0)
def test_get_mean_mulit_value(self): tr = TempTracker() for x in [0, 5, 10, 5]: tr.insert(x) self.assertEqual(tr.get_mean(), 5.0)
def test_mode_multiple_modes(self): tr = TempTracker() for x in [5, 0, 1, 2, 3, 4, 5, 4]: tr.insert(x) self.assertTrue(tr.get_mode() in [4, 5])
def test_no_mean_val(self): tr = TempTracker() self.assertEqual(tr.get_mean(), None)
def test_mode_one_single_value(self): tr = TempTracker() tr.insert(2) self.assertEqual(tr.get_mode(), 2)
def test_mode_one_multiple_value(self): tr = TempTracker() for x in [0, 1, 2, 3, 4, 5, 4]: tr.insert(x) self.assertEqual(tr.get_mode(), 4)
def test_mode_returns_none(self): tr = TempTracker() self.assertEqual(tr.get_mode(), None)
def test_min(self): tr = TempTracker() for x in [1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 0, 5, 6]: tr.insert(x) self.assertEqual(tr.get_min(), 0)
def test_max(self): tr = TempTracker() for x in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6]: tr.insert(x) self.assertEqual(tr.get_max(), 9)
def test_multiple_inserts(self): y = 10 tr = TempTracker() for x in range(0, y): tr.insert(x) self.assertEqual(len(tr._get_temps()), 10)
def test_insert(self): tr = TempTracker() tr.insert(25) self.assertEqual(tr._get_temps(), [25,])