def _pre_parse(self, http_resp): ''' @parameter http_resp: The HTTP response document that contains the HTML document inside its body. ''' SGMLParser._pre_parse(self, http_resp) assert self._baseUrl, 'The base URL must be set.'
def _pre_parse(self, httpResponse): ''' @parameter httpResponse: The HTTP response document that contains the WML document inside its body. Init, >>> from core.data.url.httpResponse import httpResponse as httpResponse >>> u = url_object('http://www.w3af.com/') Parse a simple form, >>> form = """ ... <go method="post" href="dataReceptor.php"> ... <postfield name="clave" value="$(clave)"/> ... <postfield name="cuenta" value="$(cuenta)"/> ... <postfield name="tipdat" value="D"/> ... </go>""" >>> response = httpResponse( 200, form, {}, u, u ) >>> w = wmlParser(response) >>> w.getForms() [Form({'clave': ['$(clave)'], 'cuenta': ['$(cuenta)'], 'tipdat': ['D']})] Get the simplest link >>> response = httpResponse( 200, '<a href="/index.aspx">ASP.NET</a>', {}, u, u ) >>> w = wmlParser( response ) >>> re, parsed = w.getReferences() # # TODO: # I don't really understand why I'm getting results @ the "re". # They should really be inside the "parsed" list. # # >>> re # [] # >>> parsed[0].url_string # u'http://www.w3af.com/index.aspx' Get a link by applying regular expressions >>> response = httpResponse(200, 'header /index.aspx footer', {}, u, u) >>> w = wmlParser( response ) >>> re, parsed = w.getReferences() >>> # >>> # TODO: Shouldn't this be the other way around?! >>> # >>> re [] >>> parsed[0].url_string u'http://www.w3af.com/index.aspx' ''' SGMLParser._pre_parse(self, httpResponse) assert self._baseUrl is not None, 'The base URL must be set.'