示例#1
0
    def __init__(self, httpResponse, normalizeMarkup=True, verbose=0):
        abstractParser.__init__( self, httpResponse )
        SGMLParser.__init__(self, verbose)

        # Set some constants
        self._tagsContainingURLs =  ('go', 'a','img', 'link', 'script', 'iframe', 'object',
                'embed', 'area', 'frame', 'applet', 'input', 'base',
                'div', 'layer', 'ilayer', 'bgsound', 'form')
        self._urlAttrs = ('href', 'src', 'data', 'action' )
        
        # And some internal variables
        self._tag_and_url = []
        self._parsed_URLs = []
        self._re_URLs = []
        self._encoding = httpResponse.getCharset()
        self._forms = []
        self._insideForm = False
        self._insideSelect = False
        self._insideTextarea = False
        self._insideScript = False
        self._commentsInDocument = []
        self._scriptsInDocument = []
        
        # Meta tags
        self._metaRedirs = []
        self._metaTags = []
        
        self._normalizeMarkup = normalizeMarkup
        
        #    Fill self._re_URLs list with url objects
        self._regex_url_parse( httpResponse )
        
        # Now we are ready to work
        self._preParse( httpResponse )
示例#2
0
 def __init__(self, httpResponse):
     abstractParser.__init__(self , httpResponse)
     self._parsed_URLs = []
     self._re_URLs = []
     
     # work !
     self._preParse( httpResponse.getBody() )
示例#3
0
 def __init__(self, httpResponse):
     abstractParser.__init__(self , httpResponse)
     
     #    Two lists with url objects
     self._parsed_URLs = []
     self._re_URLs = []
     
     #    Work !
     self._preParse( httpResponse.getBody() )
示例#4
0
 def __init__(self, httpResponse):
     abstractParser.__init__(self , httpResponse)
     
     # To store results
     self._parsed_URLs = []
     self._re_URLs = []
     
     # work !
     swf = httpResponse.getBody()
     if self._is_compressed( swf ):
         try:
             swf = self._inflate( swf )
         except Exception, e:
             # If the inflate fails... there is nothing else to do.
             return