Exemplo n.º 1
0
    def test_xhtml_converter(self):
        url_table = URLTable()

        image_filename = os.path.join(self.temp_dir.name, "image.png")
        tubes_filename = os.path.join(self.temp_dir.name, "tubes.html")
        ferret_filename = os.path.join(self.temp_dir.name, "ferret.jpg")

        url_table.add_many(
            [
                AddURLInfo("http://example.com/styles.css", None, None),
                AddURLInfo("http://example.com/image.png", None, None),
                AddURLInfo("http://example.com/cat.jpg", None, None),
                AddURLInfo("http://example.com/fox.jpg", None, None),
                AddURLInfo("http://example.com/ferret.jpg", None, None),
                AddURLInfo("http://example.com/tubes.html", None, None),
            ]
        )
        url_table.update_one("http://example.com/styles.css", status=Status.done.value, link_type="css")
        url_table.update_one(
            "http://example.com/image.png",
            status=Status.done.value,
            filename=os.path.relpath(image_filename, self.temp_dir.name),
        )
        url_table.update_one(
            "http://example.com/tubes.html",
            status=Status.done.value,
            filename=os.path.relpath(tubes_filename, self.temp_dir.name),
        )
        url_table.update_one(
            "http://example.com/ferret.jpg",
            status=Status.done.value,
            filename=os.path.relpath(ferret_filename, self.temp_dir.name),
        )

        html_filename = os.path.join(self.temp_dir.name, "index.html")
        new_html_filename = os.path.join(self.temp_dir.name, "index.html-new")

        with open(html_filename, "w") as out_file:
            out_file.write(XHTML_TEXT)

        for filename in [image_filename, tubes_filename, ferret_filename]:
            with open(filename, "wb"):
                pass

        element_walker = ElementWalker(css_scraper=CSSScraper())
        converter = HTMLConverter(self.get_html_parser(), element_walker, url_table)

        converter.convert(html_filename, new_html_filename, base_url="http://example.com/index.html")

        with open(new_html_filename, "r") as in_file:
            converted_text = in_file.read()

        self.assertIn("url('image.png')", converted_text)
        self.assertIn("url('http://example.com/cat.jpg')", converted_text)
        self.assertIn('"tubes.html"', converted_text)
        self.assertIn('"http://example.com/lol.html"', converted_text)
        self.assertIn("url('http://example.com/fox.jpg')", converted_text)
        self.assertIn("url('ferret.jpg')", converted_text)
        self.assertIn("hello world!!", converted_text)
        self.assertIn("<hr/>", converted_text)
Exemplo n.º 2
0
    def test_xhtml_converter(self):
        url_table = URLTable()

        image_filename = os.path.join(self.temp_dir.name, 'image.png')
        tubes_filename = os.path.join(self.temp_dir.name, 'tubes.html')
        ferret_filename = os.path.join(self.temp_dir.name, 'ferret.jpg')

        url_table.add_many([
            AddURLInfo('http://example.com/styles.css', None, None),
            AddURLInfo('http://example.com/image.png', None, None),
            AddURLInfo('http://example.com/cat.jpg', None, None),
            AddURLInfo('http://example.com/fox.jpg', None, None),
            AddURLInfo('http://example.com/ferret.jpg', None, None),
            AddURLInfo('http://example.com/tubes.html', None, None),
        ])
        url_table.update_one('http://example.com/styles.css',
                             status=Status.done.value,
                             link_type='css')
        url_table.update_one('http://example.com/image.png',
                             status=Status.done.value,
                             filename=os.path.relpath(image_filename,
                                                      self.temp_dir.name))
        url_table.update_one('http://example.com/tubes.html',
                             status=Status.done.value,
                             filename=os.path.relpath(tubes_filename,
                                                      self.temp_dir.name))
        url_table.update_one('http://example.com/ferret.jpg',
                             status=Status.done.value,
                             filename=os.path.relpath(ferret_filename,
                                                      self.temp_dir.name))

        html_filename = os.path.join(self.temp_dir.name, 'index.html')
        new_html_filename = os.path.join(self.temp_dir.name, 'index.html-new')

        with open(html_filename, 'w') as out_file:
            out_file.write(XHTML_TEXT)

        for filename in [image_filename, tubes_filename, ferret_filename]:
            with open(filename, 'wb'):
                pass

        element_walker = ElementWalker(css_scraper=CSSScraper())
        converter = HTMLConverter(self.get_html_parser(), element_walker,
                                  url_table)

        converter.convert(html_filename,
                          new_html_filename,
                          base_url='http://example.com/index.html')

        with open(new_html_filename, 'r') as in_file:
            converted_text = in_file.read()

        self.assertIn("url('image.png')", converted_text)
        self.assertIn("url('http://example.com/cat.jpg')", converted_text)
        self.assertIn('"tubes.html"', converted_text)
        self.assertIn('"http://example.com/lol.html"', converted_text)
        self.assertIn("url('http://example.com/fox.jpg')", converted_text)
        self.assertIn("url('ferret.jpg')", converted_text)
        self.assertIn("hello world!!", converted_text)
        self.assertIn("<hr/>", converted_text)
Exemplo n.º 3
0
    def test_html_converter(self):
        with TemporaryDirectory() as temp_dir:
            path_namer = PathNamer(temp_dir)
            url_table = URLTable()

            url_table.add([
                'http://example.com/styles.css',
                'http://example.com/image.png',
                'http://example.com/cat.jpg',
                'http://example.com/fox.jpg',
                'http://example.com/ferret.jpg',
                'http://example.com/tubes.html',
            ])
            url_table.update(
                'http://example.com/styles.css',
                status=Status.done,
                link_type='css'
            )
            url_table.update(
                'http://example.com/image.png',
                status=Status.done,
            )
            url_table.update(
                'http://example.com/tubes.html',
                status=Status.done,
            )
            url_table.update(
                'http://example.com/ferret.jpg',
                status=Status.done,
            )

            html_filename = os.path.join(temp_dir, 'index.html')
            new_html_filename = os.path.join(temp_dir, 'index.html-new')

            with open(html_filename, 'w') as out_file:
                out_file.write(HTML_TEXT)

            converter = HTMLConverter(path_namer, url_table)

            converter.convert(
                html_filename, new_html_filename,
                base_url='http://example.com/index.html'
            )

            with open(new_html_filename, 'r') as in_file:
                converted_text = in_file.read()

            self.assertIn("url('image.png')", converted_text)
            self.assertIn("url('http://example.com/cat.jpg')", converted_text)
            self.assertIn('"tubes.html"', converted_text)
            self.assertIn('"http://example.com/lol.html"', converted_text)
            self.assertIn("url('http://example.com/fox.jpg')", converted_text)
            self.assertIn("url('ferret.jpg')", converted_text)
Exemplo n.º 4
0
    def test_html_converter(self):
        with TemporaryDirectory() as temp_dir:
            path_namer = PathNamer(temp_dir)
            url_table = URLTable()

            url_table.add([
                'http://example.com/styles.css',
                'http://example.com/image.png',
                'http://example.com/cat.jpg',
                'http://example.com/fox.jpg',
                'http://example.com/ferret.jpg',
                'http://example.com/tubes.html',
            ])
            url_table.update('http://example.com/styles.css',
                             status=Status.done,
                             link_type='css')
            url_table.update(
                'http://example.com/image.png',
                status=Status.done,
            )
            url_table.update(
                'http://example.com/tubes.html',
                status=Status.done,
            )
            url_table.update(
                'http://example.com/ferret.jpg',
                status=Status.done,
            )

            html_filename = os.path.join(temp_dir, 'index.html')
            new_html_filename = os.path.join(temp_dir, 'index.html-new')

            with open(html_filename, 'w') as out_file:
                out_file.write(HTML_TEXT)

            converter = HTMLConverter(path_namer, url_table)

            converter.convert(html_filename,
                              new_html_filename,
                              base_url='http://example.com/index.html')

            with open(new_html_filename, 'r') as in_file:
                converted_text = in_file.read()

            self.assertIn("url('image.png')", converted_text)
            self.assertIn("url('http://example.com/cat.jpg')", converted_text)
            self.assertIn('"tubes.html"', converted_text)
            self.assertIn('"http://example.com/lol.html"', converted_text)
            self.assertIn("url('http://example.com/fox.jpg')", converted_text)
            self.assertIn("url('ferret.jpg')", converted_text)
Exemplo n.º 5
0
    def test_xhtml_converter(self):
        url_table = URLTable()

        image_filename = os.path.join(self.temp_dir.name, 'image.png')
        tubes_filename = os.path.join(self.temp_dir.name, 'tubes.html')
        ferret_filename = os.path.join(self.temp_dir.name, 'ferret.jpg')

        url_table.add_many([
            {'url': 'http://example.com/styles.css'},
            {'url': 'http://example.com/image.png'},
            {'url': 'http://example.com/cat.jpg'},
            {'url': 'http://example.com/fox.jpg'},
            {'url': 'http://example.com/ferret.jpg'},
            {'url': 'http://example.com/tubes.html'},
        ])
        url_table.update_one(
            'http://example.com/styles.css',
            status=Status.done,
            link_type='css'
        )
        url_table.update_one(
            'http://example.com/image.png',
            status=Status.done,
            filename=os.path.relpath(image_filename, self.temp_dir.name)
        )
        url_table.update_one(
            'http://example.com/tubes.html',
            status=Status.done,
            filename=os.path.relpath(tubes_filename, self.temp_dir.name)
        )
        url_table.update_one(
            'http://example.com/ferret.jpg',
            status=Status.done,
            filename=os.path.relpath(ferret_filename, self.temp_dir.name)
        )

        html_filename = os.path.join(self.temp_dir.name, 'index.html')
        new_html_filename = os.path.join(self.temp_dir.name, 'index.html-new')

        with open(html_filename, 'w') as out_file:
            out_file.write(XHTML_TEXT)

        for filename in [image_filename, tubes_filename, ferret_filename]:
            with open(filename, 'wb'):
                pass

        element_walker = ElementWalker(css_scraper=CSSScraper())
        converter = HTMLConverter(
            self.get_html_parser(), element_walker, url_table)

        converter.convert(
            html_filename, new_html_filename,
            base_url='http://example.com/index.html'
        )

        with open(new_html_filename, 'r') as in_file:
            converted_text = in_file.read()

        self.assertIn("url('image.png')", converted_text)
        self.assertIn("url('http://example.com/cat.jpg')", converted_text)
        self.assertIn('"tubes.html"', converted_text)
        self.assertIn('"http://example.com/lol.html"', converted_text)
        self.assertIn("url('http://example.com/fox.jpg')", converted_text)
        self.assertIn("url('ferret.jpg')", converted_text)
        self.assertIn("hello world!!", converted_text)
        self.assertIn("<hr/>", converted_text)
Exemplo n.º 6
0
    def test_xhtml_converter(self):
        with cd_tempdir() as temp_dir:
            url_table = URLTable()

            image_filename = os.path.join(temp_dir, 'image.png')
            tubes_filename = os.path.join(temp_dir, 'tubes.html')
            ferret_filename = os.path.join(temp_dir, 'ferret.jpg')

            url_table.add([
                'http://example.com/styles.css',
                'http://example.com/image.png',
                'http://example.com/cat.jpg',
                'http://example.com/fox.jpg',
                'http://example.com/ferret.jpg',
                'http://example.com/tubes.html',
            ])
            url_table.update(
                'http://example.com/styles.css',
                status=Status.done,
                link_type='css'
            )
            url_table.update(
                'http://example.com/image.png',
                status=Status.done,
                filename=os.path.relpath(image_filename, temp_dir)
            )
            url_table.update(
                'http://example.com/tubes.html',
                status=Status.done,
                filename=os.path.relpath(tubes_filename, temp_dir)
            )
            url_table.update(
                'http://example.com/ferret.jpg',
                status=Status.done,
                filename=os.path.relpath(ferret_filename, temp_dir)
            )

            html_filename = os.path.join(temp_dir, 'index.html')
            new_html_filename = os.path.join(temp_dir, 'index.html-new')

            with open(html_filename, 'w') as out_file:
                out_file.write(XHTML_TEXT)

            for filename in [image_filename, tubes_filename, ferret_filename]:
                with open(filename, 'wb'):
                    pass

            converter = HTMLConverter(url_table)

            converter.convert(
                html_filename, new_html_filename,
                base_url='http://example.com/index.html'
            )

            with open(new_html_filename, 'r') as in_file:
                converted_text = in_file.read()

            self.assertIn("url('image.png')", converted_text)
            self.assertIn("url('http://example.com/cat.jpg')", converted_text)
            self.assertIn('"tubes.html"', converted_text)
            self.assertIn('"http://example.com/lol.html"', converted_text)
            self.assertIn("url('http://example.com/fox.jpg')", converted_text)
            self.assertIn("url('ferret.jpg')", converted_text)
            self.assertIn("hello world!!", converted_text)
            self.assertIn("<hr/>", converted_text)