def _re_extract(self, swf_body): """ Get the URLs using a regex """ re_extract = ReExtract(swf_body, self._base_url, self._encoding) re_extract.parse() self._re_urls.update(re_extract.get_references())
def parse(self): """ Get the URLs using a regex """ doc_string = pdf_to_text(self.get_http_response().get_body()) re_extract = ReExtract(doc_string, self._base_url, self._encoding) re_extract.parse() self._re_urls = re_extract.get_references()
def parse(self): """ Get the URLs using a regex """ re_extract = ReExtract(self.get_http_response().get_body(), self._base_url, self._encoding, require_quotes=True) re_extract.parse() self._re_urls = re_extract.get_references()
def test_relative_regex(self): doc_string = '123 ../../foobar/uploads/foo.png 465' base_url = URL('https://w3af.org/abc/def/') re_extract = ReExtract(doc_string, base_url, 'utf-8') re_extract.parse() references = re_extract.get_references() self.assertEqual(references, [URL('https://w3af.org/foobar/uploads/foo.png')])
def _handle_script_tag_start(self, tag, tag_name, attrs): """ Handle the script tags """ SGMLParser._handle_script_tag_start(self, tag, tag_name, attrs) if tag.text is not None: re_extract = ReExtract(tag.text.strip(), self._base_url, self._encoding) re_extract.parse() self._re_urls.update(re_extract.get_references())
def _get_references_regex(self, mutant, mutant_response): """ Apply regular expressions to extract links from the HTTP response body. :param mutant: The request used to upload the file :param mutant_response: The HTTP response to parse :return: References (links) found in the HTTP response that end with the uploaded filename. """ # Quick performance improvement if mutant.uploaded_file_name not in mutant_response.get_body(): return [] # Apply the regular expressions and extract links re_extract = ReExtract(mutant_response.get_body(), mutant_response.get_uri(), mutant_response.get_charset()) re_extract.parse() return re_extract.get_references()