def __init__(self, httpResponse, normalizeMarkup=True, verbose=0): abstractParser.__init__( self, httpResponse ) SGMLParser.__init__(self, verbose) # Set some constants self._tagsContainingURLs = ('go', 'a','img', 'link', 'script', 'iframe', 'object', 'embed', 'area', 'frame', 'applet', 'input', 'base', 'div', 'layer', 'ilayer', 'bgsound', 'form') self._urlAttrs = ('href', 'src', 'data', 'action' ) # And some internal variables self._tag_and_url = [] self._parsed_URLs = [] self._re_URLs = [] self._encoding = httpResponse.getCharset() self._forms = [] self._insideForm = False self._insideSelect = False self._insideTextarea = False self._insideScript = False self._commentsInDocument = [] self._scriptsInDocument = [] # Meta tags self._metaRedirs = [] self._metaTags = [] self._normalizeMarkup = normalizeMarkup # Fill self._re_URLs list with url objects self._regex_url_parse( httpResponse ) # Now we are ready to work self._preParse( httpResponse )
def __init__(self, httpResponse): abstractParser.__init__(self , httpResponse) self._parsed_URLs = [] self._re_URLs = [] # work ! self._preParse( httpResponse.getBody() )
def __init__(self, httpResponse): abstractParser.__init__(self , httpResponse) # Two lists with url objects self._parsed_URLs = [] self._re_URLs = [] # Work ! self._preParse( httpResponse.getBody() )
def __init__(self, httpResponse): abstractParser.__init__(self , httpResponse) # To store results self._parsed_URLs = [] self._re_URLs = [] # work ! swf = httpResponse.getBody() if self._is_compressed( swf ): try: swf = self._inflate( swf ) except Exception, e: # If the inflate fails... there is nothing else to do. return