Exemplo n.º 1
0
 def _pre_parse(self, http_resp):
     '''
     @parameter http_resp: The HTTP response document that contains the
     HTML document inside its body.
     '''
     SGMLParser._pre_parse(self, http_resp)
     assert self._baseUrl, 'The base URL must be set.'
Exemplo n.º 2
0
    def _pre_parse(self, httpResponse):
        '''
        @parameter httpResponse: The HTTP response document that contains the WML
        document inside its body.

        Init,
        >>> from core.data.url.httpResponse import httpResponse as httpResponse
        >>> u = url_object('http://www.w3af.com/')
        
        Parse a simple form,
        >>> form = """
        ...    <go method="post" href="dataReceptor.php">
        ...        <postfield name="clave" value="$(clave)"/>
        ...        <postfield name="cuenta" value="$(cuenta)"/>
        ...        <postfield name="tipdat" value="D"/>
        ...    </go>"""
        >>> response = httpResponse( 200, form, {}, u, u )
        >>> w = wmlParser(response)
        >>> w.getForms()
        [Form({'clave': ['$(clave)'], 'cuenta': ['$(cuenta)'], 'tipdat': ['D']})]

        Get the simplest link
        >>> response = httpResponse( 200, '<a href="/index.aspx">ASP.NET</a>', {}, u, u )
        >>> w = wmlParser( response )
        >>> re, parsed = w.getReferences()
        
        #
        #    TODO:
        #        I don't really understand why I'm getting results @ the "re".
        #        They should really be inside the "parsed" list.
        #
        #    >>> re
        #    []
        #    >>> parsed[0].url_string
        #    u'http://www.w3af.com/index.aspx'

        Get a link by applying regular expressions
        >>> response = httpResponse(200, 'header /index.aspx footer', {}, u, u)
        >>> w = wmlParser( response )
        >>> re, parsed = w.getReferences()
        >>> #
        >>> # TODO: Shouldn't this be the other way around?!
        >>> #
        >>> re
        []
        >>> parsed[0].url_string
        u'http://www.w3af.com/index.aspx'
        '''
        SGMLParser._pre_parse(self, httpResponse)
        assert self._baseUrl is not None, 'The base URL must be set.'