def _highlight_nodes(self, html, nodes): """ Highlights the nodes selected by the user in the current page """ html_tree = XPathExtractor().get_object(html) for xpath in nodes: tags = html_tree.xpath(xpath) if tags: tag = tags[0] classes = tag.attrib.get("class", "") classes = "%s %s" % (classes, SELECTED_CLASS) tag.attrib["class"] = classes.strip() tag.attrib["id"] = xpath return etree.tostring(html_tree.getroot(), pretty_print=True, method="html")
class HTMLFixer(object): def __init__(self, url_regex, url, html): self._url_regex = url_regex self.url = url self.html_tree = XPathExtractor().get_object(html) def get_fixed_html(self): self._fix_tags("link", "href") self._fix_tags("img", "src") return etree.tostring(self.html_tree.getroot(), pretty_print=True, method="html") def _fix_tags(self, tag, attrib): tags = self.html_tree.xpath("//%s" % tag) for tag in tags: if not self._url_regex.match(tag.attrib[attrib]): tag.attrib[attrib] = "%s/%s" % (self.url, tag.attrib[attrib])