def _parse_action(self): self.action = None for match in self._tag_re.finditer(self.text): end = match.group(1) == '/' tag = match.group(2).lower() if tag != 'form': continue if end: break attrs = utils.parse_attrs(match.group(3)) self.action = attrs.get('action', '') self.method = attrs.get('method', 'GET') self.id = attrs.get('id') self.enctype = attrs.get('enctype', 'application/x-www-form-urlencoded') else: assert 0, "No </form> tag found" assert self.action is not None, ( "No <form> tag found")
def _find_element(self, tag, href_attr, href_extract, content, id, href_pattern, html_pattern, index, verbose): content_pat = utils.make_pattern(content) id_pat = utils.make_pattern(id) href_pat = utils.make_pattern(href_pattern) html_pat = utils.make_pattern(html_pattern) body = self.testbody _tag_re = re.compile(r'<%s\s+(.*?)>(.*?)</%s>' % (tag, tag), re.I + re.S) _script_re = re.compile(r'<script.*?>.*?</script>', re.I | re.S) bad_spans = [] for match in _script_re.finditer(body): bad_spans.append((match.start(), match.end())) def printlog(s): if verbose: print(s) found_links = [] total_links = 0 for match in _tag_re.finditer(body): found_bad = False for bad_start, bad_end in bad_spans: if (match.start() > bad_start and match.end() < bad_end): found_bad = True break if found_bad: continue el_html = match.group(0) el_attr = match.group(1) el_content = match.group(2) attrs = utils.parse_attrs(el_attr) if verbose: printlog('Element: %r' % el_html) if not attrs.get(href_attr): printlog(' Skipped: no %s attribute' % href_attr) continue el_href = attrs[href_attr] if href_extract: m = href_extract.search(el_href) if not m: printlog(" Skipped: doesn't match extract pattern") continue el_href = m.group(1) attrs['uri'] = el_href if el_href.startswith('#'): printlog(' Skipped: only internal fragment href') continue if el_href.startswith('javascript:'): printlog(' Skipped: cannot follow javascript:') continue total_links += 1 if content_pat and not content_pat(el_content): printlog(" Skipped: doesn't match description") continue if id_pat and not id_pat(attrs.get('id', '')): printlog(" Skipped: doesn't match id") continue if href_pat and not href_pat(el_href): printlog(" Skipped: doesn't match href") continue if html_pat and not html_pat(el_html): printlog(" Skipped: doesn't match html") continue printlog(" Accepted") found_links.append((el_html, el_content, attrs)) if not found_links: raise IndexError( "No matching elements found (from %s possible)" % total_links) if index is None: if len(found_links) > 1: raise IndexError( "Multiple links match: %s" % ', '.join([repr(anc) for anc, d, attr in found_links])) found_link = found_links[0] else: try: found_link = found_links[index] except IndexError: raise IndexError( "Only %s (out of %s) links match; index %s out of range" % (len(found_links), total_links, index)) return found_link
def testparse_attrs(self): self.assertEqual(parse_attrs("href='foo'"), {'href': 'foo'}) self.assertEqual(parse_attrs('href="foo"'), {'href': 'foo'}) self.assertEqual(parse_attrs('href=""'), {'href': ''}) self.assertEqual(parse_attrs('href="foo" id="bar"'), {'href': 'foo', 'id': 'bar'}) self.assertEqual(parse_attrs('href="foo" id="bar"'), {'href': 'foo', 'id': 'bar'}) self.assertEqual(parse_attrs("href='foo' id=\"bar\" "), {'href': 'foo', 'id': 'bar'}) self.assertEqual(parse_attrs("href='foo' id='bar' "), {'href': 'foo', 'id': 'bar'}) self.assertEqual(parse_attrs("tag='foo\"'"), {'tag': 'foo"'}) self.assertEqual( parse_attrs('value="<>&"{"'), {'value': '<>&"{'}) self.assertEqual(parse_attrs('value="∑"'), {'value': '∑'}) self.assertEqual(parse_attrs('value="€"'), {'value': '€'})
def call_FUT(self, obj): from webtest.utils import parse_attrs return parse_attrs(obj)
def _parse_fields(self): in_select = None in_textarea = None fields = OrderedDict() field_order = [] for match in self._tag_re.finditer(self.text): end = match.group(1) == '/' tag = match.group(2).lower() if tag not in ('input', 'select', 'option', 'textarea', 'button'): continue if tag == 'select' and end: assert in_select, ( '%r without starting select' % match.group(0)) in_select = None continue if tag == 'textarea' and end: assert in_textarea, ( "</textarea> with no <textarea> at %s" % match.start()) in_textarea[0].value = utils.html_unquote( self.text[in_textarea[1]:match.start()]) in_textarea = None continue if end: continue attrs = utils.parse_attrs(match.group(3)) if 'name' in attrs: name = attrs.pop('name') else: name = None if tag == 'option': in_select.options.append((attrs.get('value'), 'selected' in attrs)) continue if tag == 'input' and attrs.get('type') == 'radio': field = fields.get(name) if not field: field = self.FieldClass.classes['radio']( self, tag, name, match.start(), **attrs) fields.setdefault(name, []).append(field) field_order.append((name, field)) else: field = field[0] assert isinstance(field, self.FieldClass.classes['radio']) field.options.append((attrs.get('value'), 'checked' in attrs)) continue tag_type = tag if tag == 'input': tag_type = attrs.get('type', 'text').lower() if tag_type == "select" and attrs.get("multiple"): FieldClass = self.FieldClass.classes.get("multiple_select", self.FieldClass) else: FieldClass = self.FieldClass.classes.get(tag_type, self.FieldClass) field = FieldClass(self, tag, name, match.start(), **attrs) if tag == 'textarea': assert not in_textarea, ( "Nested textareas: %r and %r" % (in_textarea, match.group(0))) in_textarea = field, match.end() elif tag == 'select': assert not in_select, ( "Nested selects: %r and %r" % (in_select, match.group(0))) in_select = field fields.setdefault(name, []).append(field) field_order.append((name, field)) self.field_order = field_order self.fields = fields