def loadbytes(self, data): # defer cleaning the tree to prevent custom cleaners from cleaning # WebAnnotator markup tree = html_document_fromstring(data, encoding=self.encoding) webannotator.apply_wa_title(tree) if self.known_entities: self._prune_tags(tree) entities = self._get_entities(tree) self._process_entities(entities) return self._cleanup_tree(tree)
def test_wa_title(self): tree = html_document_fromstring(b""" <html> <head><title>Foo</title></head> <body>contents</body> <wa-title><b>hello</b>, world</wa-title> </html> """) webannotator.apply_wa_title(tree) self.assertHtmlTreeEqual(tree, html_document_fromstring(b""" <html> <head><title><b>hello</b>, world</title></head> <body>contents</body> </html> """))
def assertApplyWaTitle(self, source, result): tree = html_document_fromstring(source) webannotator.apply_wa_title(tree) self.assertHtmlTreeEqual(tree, html_document_fromstring(result))