def __init__(self, httpResponse): # Create the proper parser instance, please note that # the order in which we ask for the type is not random, # first we discard the images which account for a great # % of the URLs in a site, then we ask for WML which is # a very specific thing to match, then we try text or HTML # which is very generic (if we would have exchanged these two # we would have never got to WML), etc. if httpResponse.is_image(): msg = 'There is no parser for images.' raise w3afException(msg) elif self._isWML(httpResponse): parser = wmlParser.wmlParser(httpResponse) elif httpResponse.is_text_or_html(): parser = htmlParser.HTMLParser(httpResponse) elif self._isPDF(httpResponse): parser = pdfParser.pdfParser(httpResponse) elif self._isSWF(httpResponse): parser = swfParser.swfParser(httpResponse) else: msg = 'There is no parser for "%s".' % httpResponse.getURL() raise w3afException(msg) self._parser = parser
def __init__(self, httpResponse, normalizeMarkup=True): if self._isWML( httpResponse ): self._parser = wmlParser.wmlParser( httpResponse ) elif self._isPDF( httpResponse ): self._parser = pdfParser.pdfParser( httpResponse ) elif self._isSWF( httpResponse ): self._parser = swfParser.swfParser( httpResponse ) elif httpResponse.is_text_or_html(): self._parser = htmlParser.htmlParser( httpResponse, normalizeMarkup) else: msg = 'There is no parser for "%s".' % httpResponse.getURL() raise w3afException( msg )