Exemplo n.º 1
0
    def test_parse_question_container(self):
        """Test parse question container.

        This tests the full case when a question is, apart from
        created, edited by another user.
        """
        abparser = AskbotParser()

        page = read_file(
            'data/askbot/html_26830_comments_question_openstack.html')

        html_question = [page]

        container_info = abparser.parse_question_container(html_question[0])

        expected_container = {
            'author': {
                'badges':
                'Ignacio Mulas has 4 gold badges, 6 silver badges and 9 bronze badges',
                'reputation': '111',
                'username': '******',
                'id': '5000'
            },
            'updated_by': {
                'website': 'http://maffulli.net/',
                'badges':
                'smaffulli has 36 gold badges, 67 silver badges and 100 bronze badges',
                'reputation': '6898',
                'username': '******',
                'id': '9'
            }
        }
        self.assertEqual(container_info, expected_container)
Exemplo n.º 2
0
    def test_parse_user_info(self):
        """Test user info parsing.

        User info can be a wiki post or a user. When a user, some additional information
        can be added like country or website when available.
        """

        page = read_file('data/askbot/askbot_question_multipage_1.html')

        html_question = [page]

        bs_question = bs4.BeautifulSoup(html_question[0], "html.parser")
        # Test the user_info from the question which is a wiki post and not updated
        question = bs_question.select("div.js-question")
        container = question[0].select("div.post-update-info")
        created = container[0]
        author = AskbotParser.parse_user_info(created)
        self.assertEqual(author, "This post is a wiki")

        # Test the user_info from an item with country and website
        page = read_file('data/askbot/html_country_and_website.html')
        html_question = [page]
        bs_question = bs4.BeautifulSoup(html_question[0], "html.parser")
        bs_answers = bs_question.select("div.answer")
        body = bs_answers[0].select("div.post-body")
        update_info = body[0].select("div.post-update-info")
        author = AskbotParser.parse_user_info(update_info[0])
        self.assertEqual(author['id'], "1")
        self.assertEqual(author['badges'], "Evgeny has 56 gold badges, 98 silver badges and 212 bronze badges")
        self.assertEqual(author['reputation'], "14023")
        self.assertEqual(author['username'], "Evgeny")
        self.assertEqual(author['website'], "http://askbot.org/")
        self.assertEqual(author['country'], "Chile")
Exemplo n.º 3
0
    def test_parse_answers(self):
        """Given a question, parse all the answers available (pagination included)."""

        abparser = AskbotParser()

        page = read_file('data/askbot/html_24396_multipage_openstack.html')

        html_question = [page]

        parsed_answers = abparser.parse_answers(html_question[0])
        self.assertEqual(len(parsed_answers), 10)

        self.assertEqual(parsed_answers[0]['id'], '24427')
        self.assertEqual(parsed_answers[0]['score'], '0')
        self.assertEqual(parsed_answers[0]['added_at'], '1372894082.0')

        self.assertEqual(parsed_answers[1]['id'], '24426')
        self.assertEqual(parsed_answers[1]['score'], '0')
        self.assertEqual(parsed_answers[1]['added_at'], '1372475606.0')

        self.assertEqual(parsed_answers[2]['id'], '24425')
        self.assertEqual(parsed_answers[2]['score'], '0')
        self.assertEqual(parsed_answers[2]['added_at'], '1365772426.0')

        self.assertEqual(parsed_answers[3]['id'], '24424')
        self.assertEqual(parsed_answers[3]['score'], '0')
        self.assertEqual(parsed_answers[3]['added_at'], '1365766666.0')

        self.assertEqual(parsed_answers[4]['id'], '24423')
        self.assertEqual(parsed_answers[4]['score'], '0')
        self.assertEqual(parsed_answers[4]['added_at'], '1365762818.0')

        self.assertEqual(parsed_answers[5]['id'], '24419')
        self.assertEqual(parsed_answers[5]['score'], '0')
        self.assertEqual(parsed_answers[5]['added_at'], '1365715423.0')

        self.assertEqual(parsed_answers[6]['id'], '24418')
        self.assertEqual(parsed_answers[6]['score'], '0')
        self.assertEqual(parsed_answers[6]['added_at'], '1365687337.0')

        self.assertEqual(parsed_answers[7]['id'], '24417')
        self.assertEqual(parsed_answers[7]['score'], '0')
        self.assertEqual(parsed_answers[7]['added_at'], '1364970027.0')

        self.assertEqual(parsed_answers[8]['id'], '24416')
        self.assertEqual(parsed_answers[8]['score'], '0')
        self.assertEqual(parsed_answers[8]['added_at'], '1364965468.0')

        self.assertEqual(parsed_answers[9]['id'], '24414')
        self.assertEqual(parsed_answers[9]['score'], '0')
        self.assertEqual(parsed_answers[9]['added_at'], '1364453025.0')
Exemplo n.º 4
0
    def test_parse_number_of_html_pages(self):
        """Get the number of html needed to retrieve all the answers of a given page."""

        page = read_file('data/askbot/html_24396_multipage_openstack.html')

        html_question = [page]

        pages = AskbotParser.parse_number_of_html_pages(html_question[0])
        self.assertEqual(pages, 4)