Пример #1
0
    def test_addNewWorks(self):
        """Test the method of adding new works.

        1. Adding new works from a html document.
        2. Adding new work from a html document after getting a work for the working list.
        """
        c = Crawler()

        # Adding new works from a html document
        html1 = ('<!DOCTYPE html>'
                 '<html>'
                 '<head>'
                 '    <title>aaa</title>'
                 '</head>'
                 '<body>'
                 '    <div>'
                 '        <a href="http://www.testurl.com/link1"></a>'
                 '        <a href="/link2"></a>'
                 '    </div>'
                 '</body>'
                 '</html>')
        expected = [
            'http://www.testurl.com/link1', 'http://www.testurl.com/link2'
        ]
        bs = BeautifulSoup(html1, 'html.parser')
        url = 'http://www.testurl.com'
        c.addNewWorks(url, bs)
        self.assertTrue(c.WLM.workExists())
        self.assertEqual(c.WLM.records, deque(expected))

        # Adding new work from a html document after getting a work for the working list
        c.WLM.getWork()
        html2 = ('<!DOCTYPE html>'
                 '<html>'
                 '<head>'
                 '    <title>aaa</title>'
                 '</head>'
                 '<body>'
                 '    <div>'
                 '        <a href="/link3"></a>'
                 '    </div>'
                 '</body>'
                 '</html>')
        expected = [
            'http://www.testurl.com/link2', 'http://www.testurl.com/link3'
        ]
        bs = BeautifulSoup(html2, 'html.parser')
        url = 'http://www.testurl.com'
        c.addNewWorks(url, bs)
        self.assertTrue(c.WLM.workExists())
        self.assertEqual(c.WLM.records, deque(expected))