示例#1
0
    def testContentExample2 (self):
        from webpage.downloader import Downloader, DownloadController

        template = u'<img src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example2/'
        exampleHtmlPath = os.path.join (examplePath, u'example2.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        self.assertIn (
            template.format (path = self._staticDirName + u'/image_01.png'),
            downloader.contentResult)

        self.assertIn (
            template.format (path = self._staticDirName + u'/image_01_1.png'),
            downloader.contentResult)

        self.assertIn (
            template.format (path = self._staticDirName + u'/image_02.png'),
            downloader.contentResult)

        self.assertNotIn (
            template.format (path = self._staticDirName + u'/image_02_1.png'),
            downloader.contentResult)
示例#2
0
    def testDownloading_favicon(self):
        from webpage.downloader import Downloader, DownloadController

        template = 'href="{path}"'
        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example_favicon/'
        exampleHtmlPath = os.path.join(examplePath, 'example.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname_1 = os.path.join(self._tempDir, self._staticDirName,
                               'favicon_1.png')
        fname_2 = os.path.join(self._tempDir, self._staticDirName,
                               'favicon_2.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname_1))
        self.assertTrue(os.path.exists(fname_2))

        self.assertIn(
            template.format(path=self._staticDirName + '/favicon_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/favicon_2.png'),
            downloader.contentResult)
示例#3
0
    def testDownloading_javascript_01 (self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join (examplePath, u'example1.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        downloadDir = os.path.join (self._tempDir, self._staticDirName)

        fname1 = os.path.join (self._tempDir,
                               self._staticDirName,
                               u'fname1.js')

        fname2 = os.path.join (self._tempDir,
                               self._staticDirName,
                               u'fname2.js')

        fname3 = os.path.join (self._tempDir,
                               self._staticDirName,
                               u'fname3.js')

        fname4 = os.path.join (self._tempDir,
                               self._staticDirName,
                               u'fname4.js')

        self.assertTrue (os.path.exists (downloadDir))
        self.assertTrue (os.path.exists (fname1))
        self.assertTrue (os.path.exists (fname2))
        self.assertTrue (os.path.exists (fname3))
        self.assertTrue (os.path.exists (fname4))
示例#4
0
    def testDownloading_javascript_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName, u'fname1.js')

        fname2 = os.path.join(self._tempDir, self._staticDirName, u'fname2.js')

        fname3 = os.path.join(self._tempDir, self._staticDirName, u'fname3.js')

        fname4 = os.path.join(self._tempDir, self._staticDirName, u'fname4.js')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
        self.assertTrue(os.path.exists(fname4))
示例#5
0
    def testDownloading_img_srcset_files(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example3/'
        exampleHtmlPath = os.path.join(examplePath, 'example3.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName,
                              'image_01.png')

        fname2 = os.path.join(self._tempDir, self._staticDirName,
                              'image_02.png')

        fname3 = os.path.join(self._tempDir, self._staticDirName,
                              'image_03.png')

        fname4 = os.path.join(self._tempDir, self._staticDirName,
                              'image_04.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
        self.assertTrue(os.path.exists(fname4))
示例#6
0
    def testDownloading_beautifulsoup(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'http://www.crummy.com/software/BeautifulSoup/bs4/doc/'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)
        self.assertTrue(os.path.exists(downloadDir))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'default.css'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'pygments.css'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'jquery.js'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'underscore.js'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'doctools.js'))
示例#7
0
    def testDownloading_css_import_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import1.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import2.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import3.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import4.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic2.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic3.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic4.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic5.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic5_1.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic6.css')))
示例#8
0
    def testContentExample2(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'<img src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example2/'
        exampleHtmlPath = os.path.join(examplePath, u'example2.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(
            template.format(path=self._staticDirName + u'/image_01.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/image_01_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/image_02.png'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + u'/image_02_1.png'),
            downloader.contentResult)
示例#9
0
    def run(self):
        controller = WebPageDownloadController(self._runEvent,
                                               self._downloadDir,
                                               STATIC_DIR_NAME,
                                               self._parentWnd, self._timeout)

        downloader = Downloader(self._timeout)

        self._log(_('Start downloading\n'))

        try:
            downloader.start(self._url, controller)
        except urllib.error.URLError as error:
            self._error(_('Download error: {}\n').format(str(error.reason)))
        except (IOError, ValueError) as e:
            self._error(_('Invalid URL or file format\n'))
            self._error(str(e))
        else:
            self._log(_('Finish downloading\n'))

            content = downloader.contentResult
            staticPath = os.path.join(self._downloadDir, STATIC_DIR_NAME)
            title = downloader.pageTitle
            favicon = self._prepareFavicon(downloader.favicon)

            finishEvent = webpage.events.FinishDownloadEvent(
                content=content,
                staticPath=staticPath,
                title=title,
                favicon=favicon,
                url=self._url)
            wx.PostEvent(self._parentWnd, finishEvent)
示例#10
0
    def testContentScriptExample1(self):
        from webpage.downloader import Downloader, DownloadController

        template = '<script src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, 'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(template.format(path=self._staticDirName + '/fname1.js'),
                      downloader.contentResult)

        self.assertIn(template.format(path=self._staticDirName + '/fname2.js'),
                      downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/fname2_1.js'),
            downloader.contentResult)

        self.assertIn(template.format(path=self._staticDirName + '/fname3.js'),
                      downloader.contentResult)

        self.assertIn(template.format(path=self._staticDirName + '/fname4.js'),
                      downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + '/fname1_1.js'),
            downloader.contentResult)
示例#11
0
    def testDownloading_img_02(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example2/'
        exampleHtmlPath = os.path.join(examplePath, u'example2.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName,
                              u'image_01.png')

        fname2 = os.path.join(self._tempDir, self._staticDirName,
                              u'image_02.png')

        fname3 = os.path.join(self._tempDir, self._staticDirName,
                              u'image_03.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
示例#12
0
    def testDownloading_css_03(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/Пример 3/'
        exampleHtmlPath = os.path.join(examplePath, 'пример 3.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName, 'fname1.css')

        fname2 = os.path.join(self._tempDir, self._staticDirName, 'fname2.css')

        fname3 = os.path.join(self._tempDir, self._staticDirName, 'fname3.css')

        fname4 = os.path.join(self._tempDir, self._staticDirName, 'fname4.css')

        fname5 = os.path.join(self._tempDir, self._staticDirName,
                              'fname1_1.css')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
        self.assertTrue(os.path.exists(fname4))
        self.assertTrue(os.path.exists(fname5))
示例#13
0
    def testDownloading_img_02 (self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example2/'
        exampleHtmlPath = os.path.join (examplePath, u'example2.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        downloadDir = os.path.join (self._tempDir, self._staticDirName)

        fname1 = os.path.join (self._tempDir,
                               self._staticDirName,
                               u'image_01.png')

        fname2 = os.path.join (self._tempDir,
                               self._staticDirName,
                               u'image_02.png')

        fname3 = os.path.join (self._tempDir,
                               self._staticDirName,
                               u'image_03.png')

        self.assertTrue (os.path.exists (downloadDir))
        self.assertTrue (os.path.exists (fname1))
        self.assertTrue (os.path.exists (fname2))
        self.assertTrue (os.path.exists (fname3))
示例#14
0
    def testDownloading_stackoverflow_2(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'https://ru.stackoverflow.com/questions/241337/Как-обработать-кириллические-символы-в-urllib-request-urlopen'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
示例#15
0
    def testDownloading_stackoverflow_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'http://ru.stackoverflow.com/questions/476918/django-%D0%97%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B5-%D0%B2-%D0%B7%D0%B0%D0%B2%D0%B8%D1%81%D0%B8%D0%BC%D0%BE%D1%81%D1%82%D0%B8-%D0%BE%D1%82-%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B9-%D0%B2-%D0%91%D0%94'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
示例#16
0
    def testDownloading_stackoverflow_2(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'https://ru.stackoverflow.com/questions/241337/Как-обработать-кириллические-символы-в-urllib-request-urlopen'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
示例#17
0
    def testDownloading_stackoverflow_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'http://ru.stackoverflow.com/questions/476918/django-%D0%97%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B5-%D0%B2-%D0%B7%D0%B0%D0%B2%D0%B8%D1%81%D0%B8%D0%BC%D0%BE%D1%81%D1%82%D0%B8-%D0%BE%D1%82-%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B9-%D0%B2-%D0%91%D0%94'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
示例#18
0
    def testDownloading_toster(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'https://toster.ru/q/273244'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
示例#19
0
    def testDownloading_toster(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'https://toster.ru/q/273244'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
示例#20
0
    def testNoTitle (self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example_no_title/'
        exampleHtmlPath = os.path.join (examplePath, u'example_no_title.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        self.assertTrue (downloader.success)
        self.assertIsNone (downloader.pageTitle)
示例#21
0
    def testNoTitle(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example_no_title/'
        exampleHtmlPath = os.path.join(examplePath, u'example_no_title.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(downloader.success)
        self.assertIsNone(downloader.pageTitle)
示例#22
0
    def testTitleExample1 (self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join (examplePath, u'example1.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        self.assertTrue (downloader.success)
        self.assertEqual (downloader.pageTitle, u'Заголовок страницы')
示例#23
0
    def testTitleExample1(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(downloader.success)
        self.assertEqual(downloader.pageTitle, u'Заголовок страницы')
示例#24
0
    def testDownloading_img_srcset_content(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = '../test/webpage/example3/'
        exampleHtmlPath = os.path.join(examplePath, 'example3.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)
        downloadDir = os.path.join(self._tempDir, self._staticDirName)
        content = downloader.contentResult

        sample = 'srcset="{path}/image_02.png 2x, {path}/image_03.png w600, {path}/image_04.png"'.format(path=self._staticDirName)

        self.assertIn(sample, content)
示例#25
0
    def testDownloading_beautifulsoup(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'http://www.crummy.com/software/BeautifulSoup/bs4/doc/'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)
        self.assertTrue(os.path.exists(downloadDir))

        self.assertTrue(os.path.join(
            self._tempDir,
            self._staticDirName,
            'default.css')
        )

        self.assertTrue(os.path.join(
            self._tempDir,
            self._staticDirName,
            'pygments.css')
        )

        self.assertTrue(os.path.join(
            self._tempDir,
            self._staticDirName,
            'jquery.js')
        )

        self.assertTrue(os.path.join(
            self._tempDir,
            self._staticDirName,
            'underscore.js')
        )

        self.assertTrue(os.path.join(
            self._tempDir,
            self._staticDirName,
            'doctools.js')
        )
示例#26
0
    def testDownloading_favicon_03(self):
        from webpage.downloader import Downloader, DownloadController

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example_favicon_03/'
        exampleHtmlPath = os.path.join(examplePath, 'example.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        favicon_fname = os.path.join(
            self._tempDir, self._staticDirName, 'favicon.ico')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertEqual(controller.favicon,
                         os.path.join(self._tempDir, self._staticDirName) + '/favicon.ico')
        self.assertTrue(os.path.exists(favicon_fname))
示例#27
0
    def testContentImgExample1(self):
        from webpage.downloader import Downloader, DownloadController

        template = '<img src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, 'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_01.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/картинка.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_01_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_02.png'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + '/image_02_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_03.png'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + '/image_03_1.png'),
            downloader.contentResult)
示例#28
0
    def testDownloading_css_back_img_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_01.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_02.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_03.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_04.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_05.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_06.png')))
示例#29
0
    def testDownloading_css_url_02(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'url("{url}")'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname2_text = readTextFile(
            os.path.join(self._tempDir, self._staticDirName, u'fname2.css'))

        self.assertIn(template.format(url=u'basic2.css'), fname2_text)
        self.assertIn(template.format(url=u'basic4.css'), fname2_text)
        self.assertIn(template.format(url=u'basic5.css'), fname2_text)
        self.assertIn(template.format(url=u'basic6.css'), fname2_text)
        self.assertIn('basic3.css', fname2_text)
        self.assertIn('basic5.css', fname2_text)
示例#30
0
    def testDownloading_css_03(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = '../test/webpage/Пример 3/'
        exampleHtmlPath = os.path.join(examplePath, 'пример 3.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir,
                              self._staticDirName,
                              'fname1.css')

        fname2 = os.path.join(self._tempDir,
                              self._staticDirName,
                              'fname2.css')

        fname3 = os.path.join(self._tempDir,
                              self._staticDirName,
                              'fname3.css')

        fname4 = os.path.join(self._tempDir,
                              self._staticDirName,
                              'fname4.css')

        fname5 = os.path.join(self._tempDir,
                              self._staticDirName,
                              'fname1_1.css')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
        self.assertTrue(os.path.exists(fname4))
        self.assertTrue(os.path.exists(fname5))
示例#31
0
    def testDownloading_css_rename(self):
        from webpage.downloader import Downloader, DownloadController

        template = 'href="{path}"'
        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = '../test/webpage/example_css_rename/'
        exampleHtmlPath = os.path.join(examplePath, 'example.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname = os.path.join(self._tempDir, self._staticDirName, 'style.php.css')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname))

        self.assertIn(
            template.format(path=self._staticDirName + '/style.php.css'),
            downloader.contentResult)
示例#32
0
    def testDownloading_css_url_02 (self):
        from webpage.downloader import Downloader, DownloadController

        template = u'url("{url}")'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join (examplePath, u'example1.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        fname2_text = readTextFile (os.path.join (self._tempDir,
                                                  self._staticDirName,
                                                  u'fname2.css'))

        self.assertIn (template.format (url = u'basic2.css'), fname2_text)
        self.assertIn (template.format (url = u'basic4.css'), fname2_text)
        self.assertIn (template.format (url = u'basic5.css'), fname2_text)
        self.assertIn (template.format (url = u'basic6.css'), fname2_text)
        self.assertIn ('basic3.css', fname2_text)
        self.assertIn ('basic5.css', fname2_text)
示例#33
0
    def testDownloading_css_url_01(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'url("{url}")'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname1_text = readTextFile(
            os.path.join(self._tempDir, self._staticDirName, u'fname1.css'))

        self.assertIn(template.format(url=u'import1.css'), fname1_text)
        self.assertIn(template.format(url=u'back_img_01.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_02.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_03.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_04.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_05.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_06.png'), fname1_text)
示例#34
0
    def testDownloading_img_urlquote(self):
        from webpage.downloader import Downloader, DownloadController

        template = '<img src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example_urlquote/'
        exampleHtmlPath = os.path.join(examplePath, 'example_urlquote.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname = os.path.join(self._tempDir, self._staticDirName, 'рисунок.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname))

        self.assertIn(
            template.format(path=self._staticDirName + '/рисунок.png'),
            downloader.contentResult)
示例#35
0
    def run(self):
        controller = WebPageDownloadController(
            self._runEvent,
            self._downloadDir,
            STATIC_DIR_NAME,
            self._parentWnd,
            self._timeout
        )

        downloader = Downloader(self._timeout)

        self._log(_(u'Start downloading\n'))

        try:
            downloader.start(self._url, controller)
        except urllib2.URLError as error:
            self._error(_(u'Download error: {}\n').format(
                unicode(error.reason))
            )
        except(IOError, ValueError) as e:
            self._error(_(u'Invalid URL or file format\n'))
            self._error(unicode(e))
        else:
            self._log(_(u'Finish downloading\n'))

            content = downloader.contentResult
            staticPath = os.path.join(self._downloadDir, STATIC_DIR_NAME)
            title = downloader.pageTitle
            favicon = self._prepareFavicon(downloader.favicon)

            finishEvent = webpage.events.FinishDownloadEvent(
                content=content,
                staticPath=staticPath,
                title=title,
                favicon=favicon,
                url=self._url)
            wx.PostEvent(self._parentWnd, finishEvent)
示例#36
0
    def testContentScriptExample1(self):
        from webpage.downloader import Downloader, DownloadController

        template = '<script src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = '../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, 'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(
            template.format(path=self._staticDirName + '/fname1.js'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/fname2.js'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/fname2_1.js'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/fname3.js'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/fname4.js'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + '/fname1_1.js'),
            downloader.contentResult)
示例#37
0
    def testDownloading_css_url_01 (self):
        from webpage.downloader import Downloader, DownloadController

        template = u'url("{url}")'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join (examplePath, u'example1.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        fname1_text = readTextFile (os.path.join (self._tempDir,
                                                  self._staticDirName,
                                                  u'fname1.css'))

        self.assertIn (template.format (url = u'import1.css'), fname1_text)
        self.assertIn (template.format (url = u'back_img_01.png'), fname1_text)
        self.assertIn (template.format (url = u'back_img_02.png'), fname1_text)
        self.assertIn (template.format (url = u'back_img_03.png'), fname1_text)
        self.assertIn (template.format (url = u'back_img_04.png'), fname1_text)
        self.assertIn (template.format (url = u'back_img_05.png'), fname1_text)
        self.assertIn (template.format (url = u'back_img_06.png'), fname1_text)
示例#38
0
    def testContentCSSExample1_01(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'<link href="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname1.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname2.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname3.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname4.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname1_1.css'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + u'/fname2_1.css'),
            downloader.contentResult)
示例#39
0
    def testContentCSSExample1_01 (self):
        from webpage.downloader import Downloader, DownloadController

        template = u'<link href="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join (examplePath, u'example1.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        self.assertIn (
            template.format (path = self._staticDirName + u'/fname1.css'),
            downloader.contentResult)

        self.assertIn (
            template.format (path = self._staticDirName + u'/fname2.css'),
            downloader.contentResult)

        self.assertIn (
            template.format (path = self._staticDirName + u'/fname3.css'),
            downloader.contentResult)

        self.assertIn (
            template.format (path = self._staticDirName + u'/fname4.css'),
            downloader.contentResult)

        self.assertIn (
            template.format (path = self._staticDirName + u'/fname1_1.css'),
            downloader.contentResult)

        self.assertNotIn (
            template.format (path = self._staticDirName + u'/fname2_1.css'),
            downloader.contentResult)
示例#40
0
    def testDownloading_css_import_01 (self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join (examplePath, u'example1.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'import1.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'import2.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'import3.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'import4.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'basic2.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'basic3.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'basic4.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'basic5.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'basic5_1.css'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'basic6.css'
                )
            )
        )
示例#41
0
    def testDownloading_css_back_img_01 (self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader ()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join (examplePath, u'example1.html')

        downloader.start (self._path2url (exampleHtmlPath), controller)

        # print os.listdir (os.path.join (self._tempDir, self._staticDirName))

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'back_img_01.png'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'back_img_02.png'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'back_img_03.png'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'back_img_04.png'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'back_img_05.png'
                )
            )
        )

        self.assertTrue (
            os.path.exists (
                os.path.join (
                    self._tempDir,
                    self._staticDirName,
                    u'back_img_06.png'
                )
            )
        )