示例#1
0
    def getDocumentParserFor( self, httpResponse, normalizeMarkup=True ):
        res = None
        
        #   Before I used md5, but I realized that it was unnecessary. I experimented a little bit with
        #   python's hash functions and this is what I got:
        #
        #   dz0@laptop:~/w3af/trunk$ python -m timeit -n 100000 -s 'import zlib; s="aaa"*1234' 'zlib.crc32(s)'
        #   100000 loops, best of 3: 6.03 usec per loop
        #   dz0@laptop:~/w3af/trunk$ python -m timeit -n 100000 -s 'import zlib; s="aaa"*1234' 'zlib.adler32(s)'
        #   100000 loops, best of 3: 3.87 usec per loop
        #   dz0@laptop:~/w3af/trunk$ python -m timeit -n 100000 -s 'import hashlib; s="aaa"*1234' 'hashlib.sha1(s).hexdigest()'
        #   100000 loops, best of 3: 16.6 usec per loop
        #   dz0@laptop:~/w3af/trunk$ python -m timeit -n 100000 -s 'import hashlib; s="aaa"*1234' 'hashlib.md5(s).hexdigest()'
        #   100000 loops, best of 3: 12.9 usec per loop
        #   dz0@laptop:~/w3af/trunk$ python -m timeit -n 100000 -s 'import hashlib; s="aaa"*1234' 'hash(s)'
        #   100000 loops, best of 3: 0.117 usec per loop
        #
        #   At first I thought that the built-in hash wasn't good enough, as it could create collisions... but...
        #   given that the LRU has only 30 positions, the real probability of a colission is too low.
        #

        hash_string = hash( httpResponse.getBody() )
        
        with self._LRULock:
            if hash_string in self._cache:
                res = self._cache[ hash_string ]
            else:
                # Create a new instance of dp, add it to the cache
                res = documentParser.documentParser( httpResponse, normalizeMarkup )
                self._cache[ hash_string ] = res
            
            return res
示例#2
0
 def _get_images( self, fuzzable_request ):
     '''
     Get all img tags and retrieve the src.
     
     @parameter fuzzable_request: The request to modify
     @return: A map with the img src as a key, and a hash of the image contents as the value
     '''
     res = {}
     
     try:
         response = self._urlOpener.GET( fuzzable_request.getURI(), useCache=False )
     except:
         om.out.debug('Failed to retrieve the page for finding captchas.')
     else:
         # Do not use dpCache here, it's no good.
         #dp = dpCache.dpc.getDocumentParserFor( response )
         try:
             document_parser = documentParser.documentParser( response )
         except w3afException:
             pass
         else:
             image_list = document_parser.getReferencesOfTag('img')
             image_list = [ urlParser.uri2url(i) for i in image_list]
             for img_src in image_list:
                 # TODO: Use self._tm.startFunction
                 try:
                     image_response = self._urlOpener.GET( img_src, useCache=False )
                 except:
                     om.out.debug('Failed to retrieve the image for finding captchas.')
                 else:
                     if image_response.is_image():
                         res[ img_src ] = sha.new(image_response.getBody()).hexdigest()
     
     return res