def formvalue(formname, fieldname, value): """ >> formvalue <formname> <field> <value> Set value of a form field. There are some ambiguities in the way formvalue deals with lists: 'formvalue' will *add* the given value to a list of multiple selection, for lists that allow it. Forms are matched against 'formname' as follows: 1. regexp match to actual form name; 2. if 'formname' is an integer, it's tried as an index. Form controls are matched against 'fieldname' as follows: 1. unique exact match to control name; 2. unique regexp match to control name; 3. if fieldname is an integer, it's tried as an index; 4. unique & exact match to submit-button values. Formvalue ignores read-only fields completely; if they're readonly, nothing is done, unless the config options ('config' command) are changed. 'formvalue' is available as 'fv' as well. """ form = browser.get_form(formname) if form is None: raise TwillAssertionError("no matching forms!") control = browser.get_form_field(form, fieldname) browser.clicked(form, control) if isinstance(control, html.CheckboxGroup): pass elif 'readonly' in control.attrib.keys() and \ _options['readonly_controls_writeable']: print>>OUT, 'forcing read-only form field to writeable' del control.attrib['readonly'] elif 'readonly' in control.attrib.keys() or \ (hasattr(control, 'type') and control.type == 'file'): print>>OUT, 'form field is read-only or ignorable; nothing done.' return if hasattr(control, 'type') and control.type == 'file': raise TwillException( 'form field is for file upload; use "formfile" instead' ) set_form_control_value(control, value)
def make_boolean(value): """ Convert the input value into a boolean like so: >> make_boolean('true') True >> make_boolean('false') False >> make_boolean('1') True >> make_boolean('0') False >> make_boolean('+') True >> make_boolean('-') False """ value = str(value) value = value.lower().strip() # true/false if value in ('true', 'false'): if value == 'true': return True return False # 0/nonzero try: ival = int(value) return bool(ival) except ValueError: pass # +/- if value in ('+', '-'): if value == '+': return True return False # on/off if value in ('on', 'off'): if value == 'on': return True return False raise TwillException("unable to convert '%s' into true/false" % (value, ))
def config(key=None, value=None): """ >> config [<key> [<int value>]] Configure/report various options. If no <value> is given, report the current key value; if no <key> given, report current settings. So far: * 'acknowledge_equiv_refresh', default 1 -- follow HTTP-EQUIV=REFRESH * 'readonly_controls_writeable', default 0 -- make ro controls writeable * 'require_tidy', default 0 -- *require* that tidy be installed * 'use_BeautifulSoup', default 1 -- use the BeautifulSoup parser * 'use_tidy', default 1 -- use tidy, if it's installed * 'with_default_realm', default 0 -- use a default realm for HTTP AUTH Deprecated: * 'allow_parse_errors' has been removed. """ import utils if key is None: keys = _options.keys() keys.sort() print >> OUT, 'current configuration:' for k in keys: print >> OUT, '\t%s : %s' % (k, _options[k]) print >> OUT, '' else: v = _options.get(key) if v is None: print >> OUT, '*** no such configuration key', key print >> OUT, 'valid keys are:', ";".join(_options.keys()) raise TwillException('no such configuration key: %s' % (key, )) elif value is None: print >> OUT, '' print >> OUT, 'key %s: value %s' % (key, v) print >> OUT, '' else: value = utils.make_boolean(value) _options[key] = value
def formfile(formname, fieldname, filename, content_type=None): """ >> formfile <form> <field> <filename> [ <content_type> ] Upload a file via an "upload file" form field. """ import os.path filename = filename.replace('/', os.path.sep) form = browser.get_form(formname) control = browser.get_form_field(form, fieldname) if not control.is_of_kind('file'): raise TwillException('ERROR: field is not a file upload field!') browser.clicked(form, control) fp = open(filename, 'rb') control.add_file(fp, content_type, filename) print>>OUT, '\nAdded file "%s" to file upload field "%s"\n' % (filename, control.name,)
def debug(what, level): """ >> debug <what> <level> <what> can be: * http (any level >= 1), to display the HTTP transactions. * commands (any level >= 1), to display the commands being executed. * equiv-refresh (any level >= 1) to display HTTP-EQUIV refresh handling. """ import parse try: level = int(level) except ValueError: flag = utils.make_boolean(level) if flag: level = 1 else: level = 0 print>>OUT, 'DEBUG: setting %s debugging to level %d' % (what, level) if what == "http": # @BRT: Tries to set mechanize browser debug level directly; # @CTB not something supported by requests? # browser._browser.set_debug_http(level) pass elif what == 'equiv-refresh': if level: utils._debug_print_refresh = True else: utils._debug_print_refresh = False elif what == 'commands': if level: parse.debug_print_commands(True) else: parse.debug_print_commands(False) else: raise TwillException('unknown debugging type: "%s"' % (what,))
def submit(self, fieldname=None): """ Submit the currently clicked form using the given field. """ if fieldname is not None: fieldname = str(fieldname) if not self.get_all_forms(): raise TwillException("no forms on this page!") ctl = None form = self._browser.form if form is None: forms = [ i for i in self.get_all_forms() ] if len(forms) == 1: form = forms[0] else: raise TwillException("""\ more than one form; you must select one (use 'fv') before submitting\ """) # no fieldname? see if we can use the last submit button clicked... if not fieldname: if self.last_submit_button: ctl = self.last_submit_button else: # get first submit button in form. submits = [ c for c in form.controls \ if isinstance(c, ClientForm.SubmitControl) ] if len(submits): ctl = submits[0] else: # fieldname given; find it. ctl = self.get_form_field(form, fieldname) # # now set up the submission by building the request object that # will be sent in the form submission. # if ctl: # submit w/button print>>OUT, """\ Note: submit is using submit button: name="%s", value="%s" """ % (ctl.name, ctl.value) if isinstance(ctl, ClientForm.ImageControl): request = ctl._click(form, (1,1), "", mechanize.Request) else: request = ctl._click(form, True, "", mechanize.Request) else: # submit w/o submit button. request = form._click(None, None, None, None, 0, None, "", mechanize.Request) # # add referer information. this may require upgrading the # request object to have an 'add_unredirected_header' function. # upgrade = self._browser._ua_handlers.get('_http_request_upgrade') if upgrade: request = upgrade.http_request(request) request = self._browser._add_referer_header(request) # # now actually GO. # self._journey('open', request)
def get_form_field(self, form, fieldname): """ Return the control that matches 'fieldname'. Must be a *unique* regexp/exact string match. """ fieldname = str(fieldname) found = None found_multiple = False matches = [ c for c in form.controls if str(c.id) == fieldname ] # test exact match. if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error reporting. matches = [ c for c in form.controls if str(c.name) == fieldname ] # test exact match. if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error reporting. # test index. if found is None: # try num clickies = [c for c in form.controls] try: fieldnum = int(fieldname) - 1 found = clickies[fieldnum] except ValueError: # int() failed pass except IndexError: # fieldnum was incorrect pass # test regexp match if found is None: regexp = re.compile(fieldname) matches = [ ctl for ctl in form.controls \ if regexp.search(str(ctl.name)) ] if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error if found is None: # try value, for readonly controls like submit keys clickies = [ c for c in form.controls if c.value == fieldname \ and c.readonly ] if clickies: if len(clickies) == 1: found = clickies[0] else: found_multiple = True # record for error # error out? if found is None: if not found_multiple: raise TwillException('no field matches "%s"' % (fieldname,)) else: raise TwillException('multiple matches to "%s"' % (fieldname,)) return found
class TwillBrowser(object): """ Wrap mechanize behavior in a simple stateful way. Public variables: * result -- mechanize-style 'result' object. """ def __init__(self): # # create special link/forms parsing code to run tidy on HTML first. # factory = ConfigurableParsingFactory() # # Create the mechanize browser. # b = PatchedMechanizeBrowser(history=HistoryStack(), factory=factory) self._browser = b self.result = None self.last_submit_button = None # # create & set a cookie jar. # policy = mechanize.DefaultCookiePolicy(rfc2965=True) cj = mechanize.LWPCookieJar(policy=policy) self._browser.set_cookiejar(cj) self.cj = cj # Ask for MIME type 'text/html' by preference. self._browser.addheaders = [("Accept", "text/html; */*")] # ignore robots.txt self._browser.set_handle_robots(None) # create an HTTP auth handler self.creds = mechanize.HTTPPasswordMgr() # do handle HTTP-EQUIV properly. self._browser.set_handle_equiv(True) # callables to be called after each page load. self._post_load_hooks = [] ### get/set HTTP authentication stuff. def _set_creds(self, creds): self._creds = creds self._browser.set_password_manager(creds) def _get_creds(self): return self._creds creds = property(_get_creds, _set_creds) def go(self, url): """ Visit given URL. """ try_urls = [ url, ] # if this is an absolute URL that is just missing the 'http://' at # the beginning, try fixing that. if url.find('://') == -1: full_url = 'http://%s' % (url,) # mimic browser behavior try_urls.append(full_url) # if this is a '?' URL, then assume that we want to tack it onto # the end of the current URL. if url.startswith('?'): current_url = self.get_url() current_url = current_url.split('?')[0] try_urls = [ current_url + url, ] success = False for u in try_urls: try: self._journey('open', u) success = True break except IOError: # @CTB test this! pass if success: print>>OUT, '==> at', self.get_url() else: raise BrowserStateError("cannot go to '%s'" % (url,)) def reload(self): """ Tell the browser to reload the current page. """ self._journey('reload') print>>OUT, '==> reloaded' def back(self): """ Return to previous page, if possible. """ try: self._journey('back') print>>OUT, '==> back to', self.get_url() except BrowserStateError: print>>OUT, '==> back at empty page.' def get_code(self): """ Get the HTTP status code received for the current page. """ if self.result: return self.result.get_http_code() return None def get_html(self): """ Get the HTML for the current page. """ if self.result: return self.result.get_page() return None def get_headers(self): """ Get the headers for the current page. """ if self.result: return self.result.get_headers() return None def get_title(self): """ Get content of the HTML title element for the current page. """ return self._browser.title() def get_url(self): """ Get the URL of the current page. """ if self.result: return self.result.get_url() return None def find_link(self, pattern): """ Find the first link with a URL, link text, or name matching the given pattern. """ # # first, try to find a link matching that regexp. # try: l = self._browser.find_link(url_regex=pattern) except LinkNotFoundError: # # then, look for a text match. # try: l = self._browser.find_link(text_regex=pattern) except LinkNotFoundError: # # finally, look for a name match. # try: l = self._browser.find_link(name_regex=pattern) except LinkNotFoundError: l = None return l def follow_link(self, link): """ Follow the given link. """ self._journey('follow_link', link) print>>OUT, '==> at', self.get_url() def set_agent_string(self, agent): """ Set the agent string to the given value. """ for i in xrange(len(self._browser.addheaders)): if self._browser.addheaders[i][0] == "User-agent": del self._browser.addheaders[i] break self._browser.addheaders += [("User-agent", agent)] def showforms(self): """ Pretty-print all of the forms. Include the global form (form elements outside of <form> pairs) as forms[0] iff present. """ forms = self.get_all_forms() for n, f in enumerate(forms): print_form(n, f, OUT) def showlinks(self): """ Pretty-print all of the links. """ print>>OUT, 'Links:\n' for n, link in enumerate(self._browser.links()): print>>OUT, "%d. %s ==> %s" % (n, link.text, link.url,) print>>OUT, '' def showhistory(self): """ Pretty-print the history of links visited. """ print>>OUT, '' print>>OUT, 'History: (%d pages total) ' % (len(self._browser._history)) n = 1 for (req, resp) in self._browser._history: if req and resp: # only print those that back() will go print>>OUT, "\t%d. %s" % (n, resp.geturl()) n += 1 print>>OUT, '' def get_all_forms(self): """ Return a list of all of the forms, with global_form at index 0 iff present. """ global_form = self._browser.global_form() forms = list(self._browser.forms()) if global_form.controls: forms.insert(0, global_form) return forms def get_form(self, formname): """ Return the first form that matches 'formname'. """ formname = str(formname) forms = self.get_all_forms() # first try ID for f in forms: id = f.attrs.get("id") if id and str(id) == formname: return f # next try regexps regexp = re.compile(formname) for f in forms: if f.name and regexp.search(f.name): return f # ok, try number try: formnum = int(formname) if formnum >= 1 and formnum <= len(forms): return forms[formnum - 1] except ValueError: # int() failed pass except IndexError: # formnum was incorrect pass return None def get_form_field(self, form, fieldname): """ Return the control that matches 'fieldname'. Must be a *unique* regexp/exact string match. """ fieldname = str(fieldname) found = None found_multiple = False matches = [ c for c in form.controls if str(c.id) == fieldname ] # test exact match. if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error reporting. matches = [ c for c in form.controls if str(c.name) == fieldname ] # test exact match. if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error reporting. # test index. if found is None: # try num clickies = [c for c in form.controls] try: fieldnum = int(fieldname) - 1 found = clickies[fieldnum] except ValueError: # int() failed pass except IndexError: # fieldnum was incorrect pass # test regexp match if found is None: regexp = re.compile(fieldname) matches = [ ctl for ctl in form.controls \ if regexp.search(str(ctl.name)) ] if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error if found is None: # try value, for readonly controls like submit keys clickies = [ c for c in form.controls if c.value == fieldname \ and c.readonly ] if clickies: if len(clickies) == 1: found = clickies[0] else: found_multiple = True # record for error # error out? if found is None: if not found_multiple: raise TwillException('no field matches "%s"' % (fieldname,)) else: raise TwillException('multiple matches to "%s"' % (fieldname,)) return found def clicked(self, form, control): """ Record a 'click' in a specific form. """ if self._browser.form != form: # construct a function to choose a particular form; select_form # can use this to pick out a precise form. def choose_this_form(test_form, this_form=form): if test_form is this_form: return True return False self._browser.select_form(predicate=choose_this_form) assert self._browser.form == form self.last_submit_button = None # record the last submit button clicked. if isinstance(control, ClientForm.SubmitControl): self.last_submit_button = control def submit(self, fieldname=None): """ Submit the currently clicked form using the given field. """ if fieldname is not None: fieldname = str(fieldname) if not self.get_all_forms(): raise TwillException("no forms on this page!") ctl = None form = self._browser.form if form is None: forms = [ i for i in self.get_all_forms() ] if len(forms) == 1: form = forms[0] else: raise TwillException("""\ more than one form; you must select one (use 'fv') before submitting\ """) # no fieldname? see if we can use the last submit button clicked... if not fieldname: if self.last_submit_button: ctl = self.last_submit_button else: # get first submit button in form. submits = [ c for c in form.controls \ if isinstance(c, ClientForm.SubmitControl) ] if len(submits): ctl = submits[0] else: # fieldname given; find it. ctl = self.get_form_field(form, fieldname) # # now set up the submission by building the request object that # will be sent in the form submission. # if ctl: # submit w/button print>>OUT, """\ Note: submit is using submit button: name="%s", value="%s" """ % (ctl.name, ctl.value) if isinstance(ctl, ClientForm.ImageControl): request = ctl._click(form, (1,1), "", mechanize.Request) else: request = ctl._click(form, True, "", mechanize.Request) else: # submit w/o submit button. request = form._click(None, None, None, None, 0, None, "", mechanize.Request) # # add referer information. this may require upgrading the # request object to have an 'add_unredirected_header' function. # upgrade = self._browser._ua_handlers.get('_http_request_upgrade') if upgrade: request = upgrade.http_request(request) request = self._browser._add_referer_header(request) # # now actually GO. # self._journey('open', request) def save_cookies(self, filename): """ Save cookies into the given file. """ self.cj.save(filename, ignore_discard=True, ignore_expires=True) def load_cookies(self, filename): """ Load cookies from the given file. """ self.cj.load(filename, ignore_discard=True, ignore_expires=True) def clear_cookies(self): """ Delete all of the cookies. """ self.cj.clear() def show_cookies(self): """ Pretty-print all of the cookies. """ print>>OUT, ''' There are %d cookie(s) in the cookiejar. ''' % (len(self.cj,)) if len(self.cj): for cookie in self.cj: print>>OUT, '\t', cookie print>>OUT, '' #### private functions. def _journey(self, func_name, *args, **kwargs): """ 'func_name' should be the name of a mechanize method that either returns a 'result' object or raises a HTTPError, e.g. one of 'open', 'reload', 'back', or 'follow_link'. journey then runs that function with the given arguments and turns the results into a nice friendly standard ResultWrapper object, which is stored as 'self.result'. All exceptions other than HTTPError are unhandled. (Idea stolen straight from PBP.) """ # reset self.last_submit_button = None self.result = None func = getattr(self._browser, func_name) try: r = func(*args, **kwargs) except mechanize.HTTPError, e: r = e # seek back to 0 if a seek() function is present. seek_fn = getattr(r, 'seek', None) if seek_fn: seek_fn(0) # some URLs, like 'file:' URLs, don't have return codes. In this # case, assume success (code=200) if no such attribute. code = getattr(r, 'code', 200) ## special case refresh loops!? if code == 'refresh': raise TwillException("""\ infinite refresh loop discovered; aborting. Try turning off acknowledge_equiv_refresh...""") self.result = ResultWrapper(code, r.geturl(), r.read(), r.info()) # # Now call all of the post load hooks with the function name. # for callable in self._post_load_hooks: callable(func_name, *args, **kwargs)
def submit(self, fieldname=None): """ Submit the currently clicked form using the given field. """ if fieldname is not None: fieldname = str(fieldname) if len(self.get_all_forms()) == 0: raise TwillException("no forms on this page!") ctl = None form = self._form if form is None: forms = [ i for i in self.get_all_forms() ] if len(forms) == 1: form = forms[0] else: raise TwillException("""\ more than one form; you must select one (use 'fv') before submitting\ """) if form.action is None: form.action = self.get_url() # no fieldname? see if we can use the last submit button clicked... if fieldname is None: if self.last_submit_button is not None: ctl = self.last_submit_button else: # get first submit button in form. submits = [ c for c in form.inputs if hasattr(c, 'type') and (c.type == 'submit' or c.type == 'image')] if len(submits) != 0: ctl = submits[0] else: # fieldname given; find it. ctl = self.get_form_field(form, fieldname) # # now set up the submission by building the request object that # will be sent in the form submission. # if ctl is not None: # submit w/button print>>OUT, """\ Note: submit is using submit button: name="%s", value="%s" """ % (ctl.get("name"), ctl.value) if hasattr(ctl, 'type') and ctl.type == 'image': pass else: # submit w/o submit button. pass # @BRT: For now, the referrer is always the current page # @CTB this seems like an issue for further work. headers = {'referer' : self.get_url()} # # add referer information. this may require upgrading the # request object to have an 'add_unredirected_header' function. # # # now actually GO. # payload = list(form.form_values()) if ctl is not None and ctl.get("name") is not None: payload.append( (ctl.get("name"), ctl.value) ) if form.method == 'POST': if len(self._formFiles) != 0: r = self._session.post( form.action, data=payload, files=self._formFiles, headers=headers ) else: r = self._session.post( form.action, data=payload, headers=headers ) else: r = self._session.get(form.action, data=payload, headers=headers) self._formFiles.clear() self._history.append(self.result) self.result = ResultWrapper(r)
def get_form_field(self, form, fieldname): """ Return the control that matches 'fieldname'. Must be a *unique* regexp/exact string match. """ if fieldname in form.fields.keys(): controls = [f for f in form.inputs if f.get("name") == fieldname \ and hasattr(f, 'type') and f.type == 'checkbox'] if len(controls) > 1: return html.CheckboxGroup(controls) fieldname = str(fieldname) found = None found_multiple = False matches = [ c for c in form.inputs if c.get("id") == fieldname ] # test exact match. if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error reporting. matches = [ c for c in form.inputs if str(c.name) == fieldname ] # test exact match. if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error reporting. # test index. if found is None: # try num clickies = [c for c in form.inputs] try: fieldnum = int(fieldname) - 1 found = clickies[fieldnum] except ValueError: # int() failed pass except IndexError: # fieldnum was incorrect pass # test regexp match if found is None: regexp = re.compile(fieldname) matches = [ ctl for ctl in form.inputs \ if regexp.search(str(ctl.get("name"))) ] if matches: if unique_match(matches): found = matches[0] else: found_multiple = True # record for error if found is None: clickies = [ c for c in form.inputs if c.value == fieldname] if clickies: if len(clickies) == 1: found = clickies[0] else: found_multiple = True # record for error # error out? if found is None: if not found_multiple: raise TwillException('no field matches "%s"' % (fieldname,)) else: raise TwillException('multiple matches to "%s"' % (fieldname,)) return found
def get_title(self): if self.result is not None: return self.result.get_title() raise TwillException("Error: Getting title with no page")
def set_form_control_value(control, val): """ Helper function to deal with setting form values on checkboxes, lists etc. """ if hasattr(control, 'type') and control.type == 'checkbox': try: # checkbox = control.get() val = make_boolean(val) control.checked = val return except TwillException: # if there's more than one checkbox, use the behaviour for # ClientForm.ListControl, below. pass elif isinstance(control, html.CheckboxGroup): if val.startswith('-'): val = val[1:] flag = False else: flag = True if val.startswith('+'): val = val[1:] if flag: control.value.add(val) else: try: control.value.remove(val) except KeyError: pass elif isinstance(control, html.SelectElement): # # for ListControls (checkboxes, multiselect, etc.) we first need # to find the right *value*. Then we need to set it +/-. # # figure out if we want to *select* it, or if we want to *deselect* # it (flag T/F). By default (no +/-) select... if val.startswith('-'): val = val[1:] flag = False else: flag = True if val.startswith('+'): val = val[1:] # now, select the value. options = [i.strip() for i in control.value_options] optionNames = [i.text.strip() for i in control.getchildren()] fullOptions = dict(zip(optionNames, options)) for k, v in fullOptions.iteritems(): if (val == k or val == v) and flag: if hasattr(control, 'checkable') and control.checkable: control.checked = flag else: control.value.add(v) return elif (val == k or val == v) and not flag: try: control.value.remove(v) except ValueError: pass return raise (TwillException("Attempt to set invalid value")) else: if (hasattr(control, 'type') and control.type != 'submit'): control.value = val