def fix_urls(el: Element, base_url: str, broken_urls: List[str], urls_to_change: dict) -> Tuple[Callable, List[str]]: """ Given an HTML element, turns all ``href`` parameters of ``a`` elements inside it into fully-qualified absolute URLs instead of the relative paths that are common in the tips content. :arg Element el: ``lxml.html.Element`` object, the content to change. :arg str base_url: The URL for the page, which serves as the absolute point with which to calculate the absolute paths. :arg list broken_urls: The list of broken URLs to add to as we find them. :arg dict[str, str] urls_to_change: Known broken URLs and their replacements. :rtype: tuple[Element, list] :returns: The Element with its ``a`` elements altered, and the list of broken URLs. """ tested_urls = [] # type: List[str] for desc in el.iterdescendants(): if desc.tag == "a" and "href" in desc.attrib: fixed_url, tested_urls, broken_urls = fix_url( base_url, desc.attrib["href"], tested_urls, broken_urls, urls_to_change) desc.attrib["href"] = fixed_url return (el, broken_urls)
def inner_html(node: Element) -> str: return ''.join([ etree.tostring(child, encoding="utf-8").decode("utf-8") for child in node.iterdescendants() ])