def test_parse_question_container(self): """Test parse question container. This tests the full case when a question is, apart from created, edited by another user. """ abparser = AskbotParser() page = read_file( 'data/askbot/html_26830_comments_question_openstack.html') html_question = [page] container_info = abparser.parse_question_container(html_question[0]) expected_container = { 'author': { 'badges': 'Ignacio Mulas has 4 gold badges, 6 silver badges and 9 bronze badges', 'reputation': '111', 'username': '******', 'id': '5000' }, 'updated_by': { 'website': 'http://maffulli.net/', 'badges': 'smaffulli has 36 gold badges, 67 silver badges and 100 bronze badges', 'reputation': '6898', 'username': '******', 'id': '9' } } self.assertEqual(container_info, expected_container)
def test_parse_user_info(self): """Test user info parsing. User info can be a wiki post or a user. When a user, some additional information can be added like country or website when available. """ page = read_file('data/askbot/askbot_question_multipage_1.html') html_question = [page] bs_question = bs4.BeautifulSoup(html_question[0], "html.parser") # Test the user_info from the question which is a wiki post and not updated question = bs_question.select("div.js-question") container = question[0].select("div.post-update-info") created = container[0] author = AskbotParser.parse_user_info(created) self.assertEqual(author, "This post is a wiki") # Test the user_info from an item with country and website page = read_file('data/askbot/html_country_and_website.html') html_question = [page] bs_question = bs4.BeautifulSoup(html_question[0], "html.parser") bs_answers = bs_question.select("div.answer") body = bs_answers[0].select("div.post-body") update_info = body[0].select("div.post-update-info") author = AskbotParser.parse_user_info(update_info[0]) self.assertEqual(author['id'], "1") self.assertEqual(author['badges'], "Evgeny has 56 gold badges, 98 silver badges and 212 bronze badges") self.assertEqual(author['reputation'], "14023") self.assertEqual(author['username'], "Evgeny") self.assertEqual(author['website'], "http://askbot.org/") self.assertEqual(author['country'], "Chile")
def test_parse_answers(self): """Given a question, parse all the answers available (pagination included).""" abparser = AskbotParser() page = read_file('data/askbot/html_24396_multipage_openstack.html') html_question = [page] parsed_answers = abparser.parse_answers(html_question[0]) self.assertEqual(len(parsed_answers), 10) self.assertEqual(parsed_answers[0]['id'], '24427') self.assertEqual(parsed_answers[0]['score'], '0') self.assertEqual(parsed_answers[0]['added_at'], '1372894082.0') self.assertEqual(parsed_answers[1]['id'], '24426') self.assertEqual(parsed_answers[1]['score'], '0') self.assertEqual(parsed_answers[1]['added_at'], '1372475606.0') self.assertEqual(parsed_answers[2]['id'], '24425') self.assertEqual(parsed_answers[2]['score'], '0') self.assertEqual(parsed_answers[2]['added_at'], '1365772426.0') self.assertEqual(parsed_answers[3]['id'], '24424') self.assertEqual(parsed_answers[3]['score'], '0') self.assertEqual(parsed_answers[3]['added_at'], '1365766666.0') self.assertEqual(parsed_answers[4]['id'], '24423') self.assertEqual(parsed_answers[4]['score'], '0') self.assertEqual(parsed_answers[4]['added_at'], '1365762818.0') self.assertEqual(parsed_answers[5]['id'], '24419') self.assertEqual(parsed_answers[5]['score'], '0') self.assertEqual(parsed_answers[5]['added_at'], '1365715423.0') self.assertEqual(parsed_answers[6]['id'], '24418') self.assertEqual(parsed_answers[6]['score'], '0') self.assertEqual(parsed_answers[6]['added_at'], '1365687337.0') self.assertEqual(parsed_answers[7]['id'], '24417') self.assertEqual(parsed_answers[7]['score'], '0') self.assertEqual(parsed_answers[7]['added_at'], '1364970027.0') self.assertEqual(parsed_answers[8]['id'], '24416') self.assertEqual(parsed_answers[8]['score'], '0') self.assertEqual(parsed_answers[8]['added_at'], '1364965468.0') self.assertEqual(parsed_answers[9]['id'], '24414') self.assertEqual(parsed_answers[9]['score'], '0') self.assertEqual(parsed_answers[9]['added_at'], '1364453025.0')
def test_parse_number_of_html_pages(self): """Get the number of html needed to retrieve all the answers of a given page.""" page = read_file('data/askbot/html_24396_multipage_openstack.html') html_question = [page] pages = AskbotParser.parse_number_of_html_pages(html_question[0]) self.assertEqual(pages, 4)