Exemplo n.º 1
0
 def _get_src_values(self) -> Set[str]:
     values = set()
     for tag in self._tree.iterfind(".//*[@src]"):
         if self.base_url:
             values.add(helpers.fix_possible_value(tag.attrib["src"]))
         else:
             values.add(helpers.fix_possible_url(tag.attrib["src"]))
         tag.attrib["src"] = ""
     return values
Exemplo n.º 2
0
    def _get_meta_refresh_values(self) -> Set[str]:
        values = set()

        for tag in self._tree.iterfind(".//meta[@http-equiv][@content]"):
            value = tag.attrib["content"]
            if "url=" in value.lower():
                value = value.partition("=")[2].strip()
                value = helpers.fix_possible_value(value)
                values.add(value)

        return values
Exemplo n.º 3
0
    def _get_document_write_contents(self) -> Set[str]:
        document_writes = self._get_document_writes()
        document_writes_contents = set()

        for document_write in document_writes:
            write_begin_index = document_write.rfind("(")
            write_end_index = document_write.find(")")
            write_content = document_write[write_begin_index +
                                           1:write_end_index]
            document_writes_contents.add(
                helpers.fix_possible_value(write_content))

        return {contents for contents in document_writes_contents if contents}
Exemplo n.º 4
0
def test_fix_possible_value():
    assert helpers.fix_possible_value(
        '"//domain.com\\index\u0000.html"') == "//domain.com/index.html"
Exemplo n.º 5
0
 def get_base64_urls(self) -> Set[str]:
     fixed_base64_values = {
         helpers.fix_possible_value(v)
         for v in self.get_base64_values()
     }
     return {u for u in fixed_base64_values if URL(u).is_url}