Exemplo n.º 1
0
    def test_xhtml_converter(self):
        url_table = URLTable()

        image_filename = os.path.join(self.temp_dir.name, 'image.png')
        tubes_filename = os.path.join(self.temp_dir.name, 'tubes.html')
        ferret_filename = os.path.join(self.temp_dir.name, 'ferret.jpg')

        url_table.add_many([
            AddURLInfo('http://example.com/styles.css', None, None),
            AddURLInfo('http://example.com/image.png', None, None),
            AddURLInfo('http://example.com/cat.jpg', None, None),
            AddURLInfo('http://example.com/fox.jpg', None, None),
            AddURLInfo('http://example.com/ferret.jpg', None, None),
            AddURLInfo('http://example.com/tubes.html', None, None),
        ])
        url_table.update_one('http://example.com/styles.css',
                             status=Status.done.value,
                             link_type='css')
        url_table.update_one('http://example.com/image.png',
                             status=Status.done.value,
                             filename=os.path.relpath(image_filename,
                                                      self.temp_dir.name))
        url_table.update_one('http://example.com/tubes.html',
                             status=Status.done.value,
                             filename=os.path.relpath(tubes_filename,
                                                      self.temp_dir.name))
        url_table.update_one('http://example.com/ferret.jpg',
                             status=Status.done.value,
                             filename=os.path.relpath(ferret_filename,
                                                      self.temp_dir.name))

        html_filename = os.path.join(self.temp_dir.name, 'index.html')
        new_html_filename = os.path.join(self.temp_dir.name, 'index.html-new')

        with open(html_filename, 'w') as out_file:
            out_file.write(XHTML_TEXT)

        for filename in [image_filename, tubes_filename, ferret_filename]:
            with open(filename, 'wb'):
                pass

        element_walker = ElementWalker(css_scraper=CSSScraper())
        converter = HTMLConverter(self.get_html_parser(), element_walker,
                                  url_table)

        converter.convert(html_filename,
                          new_html_filename,
                          base_url='http://example.com/index.html')

        with open(new_html_filename, 'r') as in_file:
            converted_text = in_file.read()

        self.assertIn("url('image.png')", converted_text)
        self.assertIn("url('http://example.com/cat.jpg')", converted_text)
        self.assertIn('"tubes.html"', converted_text)
        self.assertIn('"http://example.com/lol.html"', converted_text)
        self.assertIn("url('http://example.com/fox.jpg')", converted_text)
        self.assertIn("url('ferret.jpg')", converted_text)
        self.assertIn("hello world!!", converted_text)
        self.assertIn("<hr/>", converted_text)
Exemplo n.º 2
0
    def test_html_converter(self):
        with TemporaryDirectory() as temp_dir:
            path_namer = PathNamer(temp_dir)
            url_table = URLTable()

            url_table.add([
                'http://example.com/styles.css',
                'http://example.com/image.png',
                'http://example.com/cat.jpg',
                'http://example.com/fox.jpg',
                'http://example.com/ferret.jpg',
                'http://example.com/tubes.html',
            ])
            url_table.update('http://example.com/styles.css',
                             status=Status.done,
                             link_type='css')
            url_table.update(
                'http://example.com/image.png',
                status=Status.done,
            )
            url_table.update(
                'http://example.com/tubes.html',
                status=Status.done,
            )
            url_table.update(
                'http://example.com/ferret.jpg',
                status=Status.done,
            )

            html_filename = os.path.join(temp_dir, 'index.html')
            new_html_filename = os.path.join(temp_dir, 'index.html-new')

            with open(html_filename, 'w') as out_file:
                out_file.write(HTML_TEXT)

            converter = HTMLConverter(path_namer, url_table)

            converter.convert(html_filename,
                              new_html_filename,
                              base_url='http://example.com/index.html')

            with open(new_html_filename, 'r') as in_file:
                converted_text = in_file.read()

            self.assertIn("url('image.png')", converted_text)
            self.assertIn("url('http://example.com/cat.jpg')", converted_text)
            self.assertIn('"tubes.html"', converted_text)
            self.assertIn('"http://example.com/lol.html"', converted_text)
            self.assertIn("url('http://example.com/fox.jpg')", converted_text)
            self.assertIn("url('ferret.jpg')", converted_text)