def test_hyphenate_html(self): root = parse_html5(''' <p>beautiful, <span lang="sv"><!-- x -->tillata\n<span lang="en">Expand</span></span> "latitude!''', line_numbers=False) add_soft_hyphens_to_html(root, hyphen_char='=') raw = etree.tostring(root, method='text', encoding='unicode') self.ae(raw, 'beau=ti=ful, tilla=ta\nEx=pand "lat=i=tude!')
def get_html_ids(raw_data): ans = set() root = parse_html5(raw_data, discard_namespaces=True, line_numbers=False, fix_newlines=False) for body in root.xpath('//body'): ans.update(set(body.xpath('descendant-or-self::*/@id'))) ans.update(set(body.xpath('descendant::a/@name'))) return ans
def get_html_ids(raw_data): ans = set() root = parse_html5(raw_data, discard_namespaces=True, line_numbers=False, fix_newlines=False) for body in root.xpath('//body'): ans.update(set(body.xpath('descendant-or-self::*/@id'))) ans.update(set(body.xpath('descendant::a/@name'))) return ans
def test_html5lib(self): import html5lib.html5parser # noqa from html5lib import parse # noqa # Test that we are using the calibre version of html5lib from calibre.ebooks.oeb.polish.parsing import parse_html5 parse_html5('<p>xxx')
def test_html5lib(self): import html5lib.html5parser # noqa from html5lib import parse # noqa # Test that we are using the calibre version of html5lib from calibre.ebooks.oeb.polish.parsing import parse_html5 parse_html5('<p>xxx')