def test_xhtml_converter(self): url_table = URLTable() image_filename = os.path.join(self.temp_dir.name, "image.png") tubes_filename = os.path.join(self.temp_dir.name, "tubes.html") ferret_filename = os.path.join(self.temp_dir.name, "ferret.jpg") url_table.add_many( [ AddURLInfo("http://example.com/styles.css", None, None), AddURLInfo("http://example.com/image.png", None, None), AddURLInfo("http://example.com/cat.jpg", None, None), AddURLInfo("http://example.com/fox.jpg", None, None), AddURLInfo("http://example.com/ferret.jpg", None, None), AddURLInfo("http://example.com/tubes.html", None, None), ] ) url_table.update_one("http://example.com/styles.css", status=Status.done.value, link_type="css") url_table.update_one( "http://example.com/image.png", status=Status.done.value, filename=os.path.relpath(image_filename, self.temp_dir.name), ) url_table.update_one( "http://example.com/tubes.html", status=Status.done.value, filename=os.path.relpath(tubes_filename, self.temp_dir.name), ) url_table.update_one( "http://example.com/ferret.jpg", status=Status.done.value, filename=os.path.relpath(ferret_filename, self.temp_dir.name), ) html_filename = os.path.join(self.temp_dir.name, "index.html") new_html_filename = os.path.join(self.temp_dir.name, "index.html-new") with open(html_filename, "w") as out_file: out_file.write(XHTML_TEXT) for filename in [image_filename, tubes_filename, ferret_filename]: with open(filename, "wb"): pass element_walker = ElementWalker(css_scraper=CSSScraper()) converter = HTMLConverter(self.get_html_parser(), element_walker, url_table) converter.convert(html_filename, new_html_filename, base_url="http://example.com/index.html") with open(new_html_filename, "r") as in_file: converted_text = in_file.read() self.assertIn("url('image.png')", converted_text) self.assertIn("url('http://example.com/cat.jpg')", converted_text) self.assertIn('"tubes.html"', converted_text) self.assertIn('"http://example.com/lol.html"', converted_text) self.assertIn("url('http://example.com/fox.jpg')", converted_text) self.assertIn("url('ferret.jpg')", converted_text) self.assertIn("hello world!!", converted_text) self.assertIn("<hr/>", converted_text)
def test_xhtml_converter(self): url_table = URLTable() image_filename = os.path.join(self.temp_dir.name, 'image.png') tubes_filename = os.path.join(self.temp_dir.name, 'tubes.html') ferret_filename = os.path.join(self.temp_dir.name, 'ferret.jpg') url_table.add_many([ AddURLInfo('http://example.com/styles.css', None, None), AddURLInfo('http://example.com/image.png', None, None), AddURLInfo('http://example.com/cat.jpg', None, None), AddURLInfo('http://example.com/fox.jpg', None, None), AddURLInfo('http://example.com/ferret.jpg', None, None), AddURLInfo('http://example.com/tubes.html', None, None), ]) url_table.update_one('http://example.com/styles.css', status=Status.done.value, link_type='css') url_table.update_one('http://example.com/image.png', status=Status.done.value, filename=os.path.relpath(image_filename, self.temp_dir.name)) url_table.update_one('http://example.com/tubes.html', status=Status.done.value, filename=os.path.relpath(tubes_filename, self.temp_dir.name)) url_table.update_one('http://example.com/ferret.jpg', status=Status.done.value, filename=os.path.relpath(ferret_filename, self.temp_dir.name)) html_filename = os.path.join(self.temp_dir.name, 'index.html') new_html_filename = os.path.join(self.temp_dir.name, 'index.html-new') with open(html_filename, 'w') as out_file: out_file.write(XHTML_TEXT) for filename in [image_filename, tubes_filename, ferret_filename]: with open(filename, 'wb'): pass element_walker = ElementWalker(css_scraper=CSSScraper()) converter = HTMLConverter(self.get_html_parser(), element_walker, url_table) converter.convert(html_filename, new_html_filename, base_url='http://example.com/index.html') with open(new_html_filename, 'r') as in_file: converted_text = in_file.read() self.assertIn("url('image.png')", converted_text) self.assertIn("url('http://example.com/cat.jpg')", converted_text) self.assertIn('"tubes.html"', converted_text) self.assertIn('"http://example.com/lol.html"', converted_text) self.assertIn("url('http://example.com/fox.jpg')", converted_text) self.assertIn("url('ferret.jpg')", converted_text) self.assertIn("hello world!!", converted_text) self.assertIn("<hr/>", converted_text)
def test_css_converter(self): url_table = URLTable() css_filename = os.path.join(self.temp_dir.name, 'styles.css') image_filename = os.path.join(self.temp_dir.name, 'image.png') new_css_filename = os.path.join(self.temp_dir.name, 'styles.css-new') url_table.add_many([ {'url': 'http://example.com/styles.css'}, {'url': 'http://example.com/image.png'}, {'url': 'http://example.com/cat.jpg'}, {'url': 'http://example.com/cat.jpg'}, ]) url_table.update_one( 'http://example.com/styles.css', status=Status.done, link_type='css', filename=os.path.relpath(css_filename, self.temp_dir.name) ) url_table.update_one( 'http://example.com/image.png', status=Status.done, filename=os.path.relpath(image_filename, self.temp_dir.name) ) with open(css_filename, 'w') as out_file: out_file.write(CSS_TEXT) with open(image_filename, 'wb'): pass converter = CSSConverter(url_table) converter.convert( css_filename, new_css_filename, base_url='http://example.com/styles.css' ) with open(new_css_filename, 'r') as in_file: converted_text = in_file.read() self.assertIn("url('image.png')", converted_text) self.assertIn("url('http://example.com/cat.jpg')", converted_text)
def test_css_converter(self): url_table = URLTable() css_filename = os.path.join(self.temp_dir.name, "styles.css") image_filename = os.path.join(self.temp_dir.name, "image.png") new_css_filename = os.path.join(self.temp_dir.name, "styles.css-new") url_table.add_many( [ AddURLInfo("http://example.com/styles.css", None, None), AddURLInfo("http://example.com/image.png", None, None), AddURLInfo("http://example.com/cat.jpg", None, None), AddURLInfo("http://example.com/cat.jpg", None, None), ] ) url_table.update_one( "http://example.com/styles.css", status=Status.done.value, link_type="css", filename=os.path.relpath(css_filename, self.temp_dir.name), ) url_table.update_one( "http://example.com/image.png", status=Status.done.value, filename=os.path.relpath(image_filename, self.temp_dir.name), ) with open(css_filename, "w") as out_file: out_file.write(CSS_TEXT) with open(image_filename, "wb"): pass converter = CSSConverter(url_table) converter.convert(css_filename, new_css_filename, base_url="http://example.com/styles.css") with open(new_css_filename, "r") as in_file: converted_text = in_file.read() self.assertIn("url('image.png')", converted_text) self.assertIn("url('http://example.com/cat.jpg')", converted_text)
def test_xhtml_converter(self): url_table = URLTable() image_filename = os.path.join(self.temp_dir.name, 'image.png') tubes_filename = os.path.join(self.temp_dir.name, 'tubes.html') ferret_filename = os.path.join(self.temp_dir.name, 'ferret.jpg') url_table.add_many([ {'url': 'http://example.com/styles.css'}, {'url': 'http://example.com/image.png'}, {'url': 'http://example.com/cat.jpg'}, {'url': 'http://example.com/fox.jpg'}, {'url': 'http://example.com/ferret.jpg'}, {'url': 'http://example.com/tubes.html'}, ]) url_table.update_one( 'http://example.com/styles.css', status=Status.done, link_type='css' ) url_table.update_one( 'http://example.com/image.png', status=Status.done, filename=os.path.relpath(image_filename, self.temp_dir.name) ) url_table.update_one( 'http://example.com/tubes.html', status=Status.done, filename=os.path.relpath(tubes_filename, self.temp_dir.name) ) url_table.update_one( 'http://example.com/ferret.jpg', status=Status.done, filename=os.path.relpath(ferret_filename, self.temp_dir.name) ) html_filename = os.path.join(self.temp_dir.name, 'index.html') new_html_filename = os.path.join(self.temp_dir.name, 'index.html-new') with open(html_filename, 'w') as out_file: out_file.write(XHTML_TEXT) for filename in [image_filename, tubes_filename, ferret_filename]: with open(filename, 'wb'): pass element_walker = ElementWalker(css_scraper=CSSScraper()) converter = HTMLConverter( self.get_html_parser(), element_walker, url_table) converter.convert( html_filename, new_html_filename, base_url='http://example.com/index.html' ) with open(new_html_filename, 'r') as in_file: converted_text = in_file.read() self.assertIn("url('image.png')", converted_text) self.assertIn("url('http://example.com/cat.jpg')", converted_text) self.assertIn('"tubes.html"', converted_text) self.assertIn('"http://example.com/lol.html"', converted_text) self.assertIn("url('http://example.com/fox.jpg')", converted_text) self.assertIn("url('ferret.jpg')", converted_text) self.assertIn("hello world!!", converted_text) self.assertIn("<hr/>", converted_text)