Пример #1
0
	def testSmallCSVFile(self):
		results = {}

		# Add a specific thread, http://archive.4plebs.org/x/thread/23732801/
		results.update(**httpGET_json(gen_thread_api_url('x', 23732801)))

		# First page of /x/
		results.update(**httpGET_json(gen_index_api_url('x', 1)))

		# Turn that json dict into a list of Post objects
		postList = FourPlebsAPI_Post.from_post_json(results)

		output_csv_filepath = 'out/testcase-output-small-example.csv'
		CSVPostWriter.write_posts_to_csv(postList, output_csv_filepath, ALL_CONTENT_FLAGGERS)

		i = 0
		# All lines in this CSV should contain commas!
		with open(output_csv_filepath, 'r') as f:
			for line in f:
				print("line {}".format(i))
				self.assertIn(',', line)

				i += 1

		with open(output_csv_filepath, 'r') as f:
			self.ensure_csv_has_no_empty_fields(f, count=4)
def gather_range_with_boards(start: int, end: int,
                             boards: List[str]) -> List[FourPlebsAPI_Post]:
    """Given a start and end page range, gather posts from various boards."""
    results = {}

    for i in range(start, end):
        for board in boards:
            results.update(**httpGET_json(gen_index_api_url(board, i)))

            print("Page {} of /{}/".format(i, board))

    return FourPlebsAPI_Post.from_post_json(results)
Пример #3
0
def generate_large_example_csv(page_start=1, page_end=20, boards=['pol', 'x']):
    results = {}

    for i in range(page_start, page_end):
        for board in boards:
            results.update(**httpGET_json(gen_index_api_url(board, i)))

        print("{}th page...".format(i))

    postList = FourPlebsAPI_Post.from_post_json(results)

    CSVPostWriter.write_posts_to_csv(postList, 'out/post-output-large.csv',
                                     ALL_CONTENT_FLAGGERS)
def generate_small_example_csv():
    results = {}

    # Add a specific thread, http://archive.4plebs.org/x/thread/23732801/
    results.update(**httpGET_json(gen_thread_api_url('x', 23732801)))

    for i in range(1, 10):
        # Get the posts from page 1-10 /pol/
        results.update(**httpGET_json(gen_index_api_url('pol', i)))

    # Add on the posts from page 1 /x/
    results.update(**httpGET_json(gen_index_api_url('x', 1)))

    # Turn that json dict into a list of Post objects
    postList = FourPlebsAPI_Post.from_post_json(results)

    # # For all posts from the two index pages (/x/, /pol/)
    # for post in postList:
    # 	print(post)

    CSVPostWriter.write_posts_to_csv(postList,
                                     'out/post-output-small-example.csv',
                                     ALL_CONTENT_FLAGGERS)