Пример #1
0
	def testSmallCSVFile(self):
		results = {}

		# Add a specific thread, http://archive.4plebs.org/x/thread/23732801/
		results.update(**httpGET_json(gen_thread_api_url('x', 23732801)))

		# First page of /x/
		results.update(**httpGET_json(gen_index_api_url('x', 1)))

		# Turn that json dict into a list of Post objects
		postList = FourPlebsAPI_Post.from_post_json(results)

		output_csv_filepath = 'out/testcase-output-small-example.csv'
		CSVPostWriter.write_posts_to_csv(postList, output_csv_filepath, ALL_CONTENT_FLAGGERS)

		i = 0
		# All lines in this CSV should contain commas!
		with open(output_csv_filepath, 'r') as f:
			for line in f:
				print("line {}".format(i))
				self.assertIn(',', line)

				i += 1

		with open(output_csv_filepath, 'r') as f:
			self.ensure_csv_has_no_empty_fields(f, count=4)
Пример #2
0
def gather_range_with_boards(start: int, end: int,
                             boards: List[str]) -> List[FourPlebsAPI_Post]:
    """Given a start and end page range, gather posts from various boards."""
    results = {}

    for i in range(start, end):
        for board in boards:
            results.update(**httpGET_json(gen_index_api_url(board, i)))

            print("Page {} of /{}/".format(i, board))

    return FourPlebsAPI_Post.from_post_json(results)
def grab_messages(thread: str):
    """Get messages from given thread"""
    json = bowserUtils.httpGET_json(
        str('https://reddit.com' + thread)[0:-1] + '.json')
    messages = []
    messages.append(json[0]['data']['children'][0]['data']['title'] + ':' +
                    json[0]['data']['children'][0]['data']['selftext'])

    [
        messages.append(comment['data']['body'])
        for comment in json[1]['data']['children']
    ]

    return messages
Пример #4
0
def generate_small_example_csv():
    results = {}

    # Add a specific thread, http://archive.4plebs.org/x/thread/23732801/
    results.update(**httpGET_json(gen_thread_api_url('x', 23732801)))

    for i in range(1, 10):
        # Get the posts from page 1-10 /pol/
        results.update(**httpGET_json(gen_index_api_url('pol', i)))

    # Add on the posts from page 1 /x/
    results.update(**httpGET_json(gen_index_api_url('x', 1)))

    # Turn that json dict into a list of Post objects
    postList = FourPlebsAPI_Post.from_post_json(results)

    # # For all posts from the two index pages (/x/, /pol/)
    # for post in postList:
    # 	print(post)

    CSVPostWriter.write_posts_to_csv(postList,
                                     'out/post-output-small-example.csv',
                                     ALL_CONTENT_FLAGGERS)
def grab_subreddit(name):
    """Get JSON of a given subreddit"""
    return bowserUtils.httpGET_json('https://www.reddit.com/r/' + name +
                                    '.json')