def _pre_parse(self, http_resp): """ :param http_resp: The HTTP response document that contains the HTML document inside its body. """ SGMLParser._pre_parse(self, http_resp) assert self._base_url, 'The base URL must be set.'
def _pre_parse(self, HTTPResponse): """ :param HTTPResponse: The HTTP response document that contains the WML document inside its body. """ SGMLParser._pre_parse(self, HTTPResponse) assert self._base_url is not None, 'The base URL must be set.'
def __init__(self, http_resp): # Save "_parse" reference orig_parse = self._parse # Monkeypatch it! self._parse = lambda arg: None # Now call parent's __init__ SGMLParser.__init__(self, http_resp) # Restore it self._parse = orig_parse
def _handle_textarea_tag_end(self, tag): """ Handler for textarea end tag """ SGMLParser._handle_textarea_tag_end(self, tag) attrs = {'name': self._textarea_tag_name, 'value': self._textarea_data} if not self._forms: self._saved_inputs.append(attrs) else: form_obj = self._forms[-1] form_obj.add_input(attrs.items())
def _handle_textarea_tag_end(self, tag): """ Handler for textarea end tag """ SGMLParser._handle_textarea_tag_end(self, tag) attrs = {"name": self._textarea_tag_name, "value": self._textarea_data} if not self._forms: self._saved_inputs.append(attrs) else: form_obj = self._forms[-1] form_obj.add_input(attrs.items())
def __init__(self, http_resp): # An internal list to be used to save input tags found # outside of the scope of a form tag. self._saved_inputs = [] # For <textarea> elems parsing self._textarea_tag_name = "" self._textarea_data = "" # For <select> elems parsing self._selects = [] # Save for using in form parsing self._source_url = http_resp.get_url() # Call parent's __init__ SGMLParser.__init__(self, http_resp)
def _handle_select_tag_end(self, tag): """ Handler for select end tag """ SGMLParser._handle_select_tag_end(self, tag) if self._forms: form_obj = self._forms[-1] for sel_name, optvalues in self._selects: # First convert to list of tuples before passing it as arg optvalues = [tuple(attrs.items()) for attrs in optvalues] form_obj.add_select(sel_name, optvalues) # Reset selects container self._selects = []
def _handle_form_tag_start(self, tag, attrs): """ Handle the form tags. This method also looks if there are "pending inputs" in the self._saved_inputs list and parses them. """ SGMLParser._handle_form_tag_start(self, tag, attrs) # Get the 'method' method = attrs.get('method', 'GET').upper() # Get the action action = attrs.get('action', None) missing_action = action is None # Get the encoding form_encoding = attrs.get('enctype', DEFAULT_FORM_ENCODING) if missing_action: action = self._source_url else: action = self._decode_url(action) try: action = self._base_url.url_join(action, encoding=self._encoding) except ValueError: # The URL in the action is invalid, the best thing we can do # is to guess, and our best guess is that the URL will be the # current one. action = self._source_url # Create the form object and store everything for later use form_params = FormParameters(encoding=self._encoding) form_params.set_method(method) form_params.set_action(action) form_params.set_form_encoding(form_encoding) self._forms.append(form_params) # Now I verify if there are any input tags that were found # outside the scope of a form tag for inputattrs in self._saved_inputs: # Parse them just like if they were found AFTER the # form tag opening if isinstance(inputattrs, dict): self._handle_input_tag_inside_form('input', inputattrs) # All parsed, remove them. self._saved_inputs = []
def __init__(self, http_response): self._select_tag_name = "" SGMLParser.__init__(self, http_response)
def __init__(self, HTTPResponse): self._select_tag_name = "" SGMLParser.__init__(self, HTTPResponse)