def testSmallCSVFile(self): results = {} # Add a specific thread, http://archive.4plebs.org/x/thread/23732801/ results.update(**httpGET_json(gen_thread_api_url('x', 23732801))) # First page of /x/ results.update(**httpGET_json(gen_index_api_url('x', 1))) # Turn that json dict into a list of Post objects postList = FourPlebsAPI_Post.from_post_json(results) output_csv_filepath = 'out/testcase-output-small-example.csv' CSVPostWriter.write_posts_to_csv(postList, output_csv_filepath, ALL_CONTENT_FLAGGERS) i = 0 # All lines in this CSV should contain commas! with open(output_csv_filepath, 'r') as f: for line in f: print("line {}".format(i)) self.assertIn(',', line) i += 1 with open(output_csv_filepath, 'r') as f: self.ensure_csv_has_no_empty_fields(f, count=4)
def gather_range_with_boards(start: int, end: int, boards: List[str]) -> List[FourPlebsAPI_Post]: """Given a start and end page range, gather posts from various boards.""" results = {} for i in range(start, end): for board in boards: results.update(**httpGET_json(gen_index_api_url(board, i))) print("Page {} of /{}/".format(i, board)) return FourPlebsAPI_Post.from_post_json(results)
def grab_messages(thread: str): """Get messages from given thread""" json = bowserUtils.httpGET_json( str('https://reddit.com' + thread)[0:-1] + '.json') messages = [] messages.append(json[0]['data']['children'][0]['data']['title'] + ':' + json[0]['data']['children'][0]['data']['selftext']) [ messages.append(comment['data']['body']) for comment in json[1]['data']['children'] ] return messages
def generate_small_example_csv(): results = {} # Add a specific thread, http://archive.4plebs.org/x/thread/23732801/ results.update(**httpGET_json(gen_thread_api_url('x', 23732801))) for i in range(1, 10): # Get the posts from page 1-10 /pol/ results.update(**httpGET_json(gen_index_api_url('pol', i))) # Add on the posts from page 1 /x/ results.update(**httpGET_json(gen_index_api_url('x', 1))) # Turn that json dict into a list of Post objects postList = FourPlebsAPI_Post.from_post_json(results) # # For all posts from the two index pages (/x/, /pol/) # for post in postList: # print(post) CSVPostWriter.write_posts_to_csv(postList, 'out/post-output-small-example.csv', ALL_CONTENT_FLAGGERS)
def grab_subreddit(name): """Get JSON of a given subreddit""" return bowserUtils.httpGET_json('https://www.reddit.com/r/' + name + '.json')