def test_attachment_parsing(self): bugzilla = Bugzilla() soup = BeautifulSoup(self._example_attachment) attachment_element = soup.find("attachment") attachment = bugzilla._parse_attachment_element(attachment_element, self._expected_example_attachment_parsing['bug_id']) self.assertTrue(attachment) self._assert_dictionaries_equal(attachment, self._expected_example_attachment_parsing)
def test_status_parsing(self): buildbot = BuildBot() soup = BeautifulSoup(self._example_one_box_status) status_table = soup.find("table") input_rows = status_table.findAll("tr") for x in range(len(input_rows)): status_row = input_rows[x] expected_parsing = self._expected_example_one_box_parsings[x] builder = buildbot._parse_builder_status_from_row(status_row) # Make sure we aren't parsing more or less than we expect self.assertEquals(builder.keys(), expected_parsing.keys()) for key, expected_value in expected_parsing.items(): self.assertEquals( builder[key], expected_value, ( "Builder %d parse failure for key: %s: Actual='%s' Expected='%s'" % (x, key, builder[key], expected_value) ), )
def _parse_attachment_ids_request_query(self, page, since=None): # Formats digits = re.compile("\d+") attachment_href = re.compile("attachment.cgi\?id=\d+&action=review") # if no date is given, return all ids if not since: attachment_links = SoupStrainer("a", href=attachment_href) return [int(digits.search(tag["href"]).group(0)) for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)] # Parse the main table only date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}") mtab = SoupStrainer("table", {"class": "requests"}) soup = BeautifulSoup(page, parseOnlyThese=mtab) patch_ids = [] for row in soup.findAll("tr"): patch_tag = row.find("a", {"href": attachment_href}) if not patch_tag: continue patch_id = int(digits.search(patch_tag["href"]).group(0)) date_tag = row.find("td", text=date_format) if date_tag and datetime.strptime(date_format.search(date_tag).group(0), "%Y-%m-%d %H:%M") < since: _log.info("Patch is old: %d (%s)" % (patch_id, date_tag)) continue patch_ids.append(patch_id) return patch_ids
def test_convert_vendor_prefix_js_paths(self): test_html = """<head> <script src="/common/vendor-prefix.js"> </head> """ fake_dir_path = self.fake_dir_path('adapterjspaths') converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME) oc = OutputCapture() oc.capture_output() try: converter.feed(test_html) converter.close() converted = converter.output() finally: oc.restore_output() new_html = BeautifulSoup(converted[1]) # Verify the original paths are gone, and the new paths are present. orig_path_pattern = re.compile('\"/common/vendor-prefix.js') self.assertEquals(len(new_html.findAll(src=orig_path_pattern)), 0, 'vendor-prefix.js path was not converted') resources_dir = converter.path_from_webkit_root("LayoutTests", "resources") new_relpath = os.path.relpath(resources_dir, fake_dir_path) relpath_pattern = re.compile(new_relpath) self.assertEquals(len(new_html.findAll(src=relpath_pattern)), 1, 'vendor-prefix.js relative path not correct')
def _parse_result_count(self, results_page): result_count_text = BeautifulSoup(results_page).find(attrs={'class': 'bz_result_count'}).string result_count_parts = result_count_text.strip().split(" ") if result_count_parts[0] == "Zarro": return 0 if result_count_parts[0] == "One": return 1 return int(result_count_parts[0])
def test_failures_from_fail_row(self): row = BeautifulSoup("<tr><td><a>test.hml</a></td><td><a>expected image</a></td><td><a>25%</a></td></tr>") test_name = unicode(row.find("a").string) # Even if the caller has already found the test name, findAll inside _failures_from_fail_row will see it again. failures = OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row]) self.assertEqual(len(failures), 1) self.assertEqual(type(sorted(failures)[0]), test_failures.FailureImageHashMismatch) row = BeautifulSoup("<tr><td><a>test.hml</a><a>foo</a></td></tr>") expected_stderr = "Unhandled link text in results.html parsing: foo. Please file a bug against webkitpy.\n" OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row], expected_stderr=expected_stderr)
def _revisions_for_builder(self, builder): soup = BeautifulSoup(self._fetch_builder_page(builder)) revisions = [] for status_row in soup.find('table').findAll('tr'): revision_anchor = status_row.find('a') table_cells = status_row.findAll('td') if not table_cells or len(table_cells) < 3 or not table_cells[2].string: continue if revision_anchor and revision_anchor.string and re.match(r'^\d+$', revision_anchor.string): revisions.append((int(revision_anchor.string), 'success' in table_cells[2].string)) return revisions
def verify_test_harness_paths(self, converter, converted, test_path, num_src_paths, num_href_paths): if isinstance(converted, basestring): converted = BeautifulSoup(converted) resources_dir = converter.path_from_webkit_root("LayoutTests", "resources") # Verify the original paths are gone, and the new paths are present. orig_path_pattern = re.compile('\"/resources/testharness') self.assertEquals(len(converted.findAll(src=orig_path_pattern)), 0, 'testharness src path was not converted') self.assertEquals(len(converted.findAll(href=orig_path_pattern)), 0, 'testharness href path was not converted') new_relpath = os.path.relpath(resources_dir, test_path) relpath_pattern = re.compile(new_relpath) self.assertEquals(len(converted.findAll(src=relpath_pattern)), num_src_paths, 'testharness src relative path not correct') self.assertEquals(len(converted.findAll(href=relpath_pattern)), num_href_paths, 'testharness href relative path not correct')
def _parse_bugs_from_xml(self, page): soup = BeautifulSoup(page) # Without the unicode() call, BeautifulSoup occasionally complains of being # passed None for no apparent reason. return [ Bug(self._parse_bug_dictionary_from_xml(unicode(bug_xml)), self) for bug_xml in soup('bug') ]
def _parse_attachment_ids_request_query(self, page): digits = re.compile("\d+") attachment_href = re.compile("attachment.cgi\?id=\d+&action=review") attachment_links = SoupStrainer("a", href=attachment_href) return [ int(digits.search(tag["href"]).group(0)) for tag in BeautifulSoup(page, parseOnlyThese=attachment_links) ]
def _fetch_bug_ids_advanced_query(self, query): soup = BeautifulSoup(self._load_query(query)) # The contents of the <a> inside the cells in the first column happen # to be the bug id. return [ int(bug_link_cell.find("a").string) for bug_link_cell in soup('td', "first-child") ]
def convert_html(self, new_path, contents, filename): doc = BeautifulSoup(contents) did_modify_paths = self.convert_testharness_paths( doc, new_path, filename) converted_properties_and_content = self.convert_prefixed_properties( doc, filename) return converted_properties_and_content if ( did_modify_paths or converted_properties_and_content[0]) else None
def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if (ref_file == self.filename): return {'referencefile': self.filename} if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} # not all reference tests have a <link rel='match'> element in WPT repo elif self.is_wpt_reftest(): test_info = {'test': self.filename, 'reference': self.potential_ref_filename()} test_info['reference_support_info'] = {} # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests elif self.is_wpt_manualtest(): test_info = {'test': self.filename, 'manualtest': True} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif '-ref' in self.filename or 'reference' in self.filename: test_info = {'referencefile': self.filename} elif self.options['all'] is True: test_info = {'test': self.filename} return test_info
def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename( self.filename) ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), matches[0]['href']) if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # at test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]['href'].startswith('..'): support_files = self.support_files(self.ref_doc) test_info['refsupport'] = support_files elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not ( '-ref' in self.filename) and not ('reference' in self.filename): test_info = {'test': self.filename} return test_info
def _parse_bug_id_from_attachment_page(self, page): # The "Up" relation happens to point to the bug. up_link = BeautifulSoup(page).find('link', rel='Up') if not up_link: # This attachment does not exist (or you don't have permissions to # view it). return None match = re.search("show_bug.cgi\?id=(?P<bug_id>\d+)", up_link['href']) return int(match.group('bug_id'))
def test_failures_from_fail_row(self): row = BeautifulSoup( "<tr><td><a>test.hml</a></td><td><a>expected image</a></td><td><a>25%</a></td></tr>" ) test_name = unicode(row.find("a").string) # Even if the caller has already found the test name, findAll inside _failures_from_fail_row will see it again. failures = OutputCapture().assert_outputs( self, ORWTResultsHTMLParser._failures_from_fail_row, [row]) self.assertEqual(len(failures), 1) self.assertEqual(type(sorted(failures)[0]), test_failures.FailureImageHashMismatch) row = BeautifulSoup("<tr><td><a>test.hml</a><a>foo</a></td></tr>") expected_stderr = "Unhandled link text in results.html parsing: foo. Please file a bug against webkitpy.\n" OutputCapture().assert_outputs( self, ORWTResultsHTMLParser._failures_from_fail_row, [row], expected_stderr=expected_stderr)
def test_status_parsing(self): buildbot = BuildBot() soup = BeautifulSoup(self._example_one_box_status) status_table = soup.find("table") input_rows = status_table.findAll('tr') for x in range(len(input_rows)): status_row = input_rows[x] expected_parsing = self._expected_example_one_box_parsings[x] builder = buildbot._parse_builder_status_from_row(status_row) # Make sure we aren't parsing more or less than we expect self.assertEquals(builder.keys(), expected_parsing.keys()) for key, expected_value in expected_parsing.items(): self.assertEquals(builder[key], expected_value, ("Builder %d parse failure for key: %s: Actual='%s' Expected='%s'" % (x, key, builder[key], expected_value)))
def analyze_test(self, test_contents=None, ref_contents=None): """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns: A dict which can have the properties: "test": test file name. "reference": related reference test file name if this is a reference test. "reference_support_info": extra information about the related reference test and any support files. "jstest": A boolean, whether this is a JS test. If the given contents are empty, then None is returned. """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = BeautifulSoup(test_contents) if ref_contents is not None: self.ref_doc = BeautifulSoup(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files. test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname( self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] and '-ref' not in self.filename and 'reference' not in self.filename: test_info = {'test': self.filename} return test_info
def _parse_result_count(self, results_page): result_count_text = BeautifulSoup(results_page).find(attrs={'class': 'bz_result_count'}) if result_count_text is None or result_count_text.string is None: _log.warn("BeautifulSoup returned None while finding class: bz_result_count in:\n{}".format(results_page)) return 0 result_count_parts = result_count_text.string.strip().split(" ") if result_count_parts[0] == "Zarro": return 0 if result_count_parts[0] == "One": return 1 return int(result_count_parts[0])
def _parse_bug_id_from_attachment_page(self, page): # The "Up" relation happens to point to the bug. title = BeautifulSoup(page).find('div', attrs={'id': 'bug_title'}) if not title: _log.warning("This attachment does not exist (or you don't have permissions to view it).") return None match = re.search(r"show_bug.cgi\?id=(?P<bug_id>\d+)", str(title)) if not match: _log.warning("Unable to parse bug id from attachment") return None return int(match.group('bug_id'))
def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename(self.filename) ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # at test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]['href'].startswith('..'): support_files = self.support_files(self.ref_doc) test_info['refsupport'] = support_files elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename): test_info = {'test': self.filename} return test_info
def _parse_bug_dictionary_from_xml(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = self._string_contents(soup.find("short_desc")) bug["bug_status"] = self._string_contents(soup.find("bug_status")) dup_id = soup.find("dup_id") if dup_id: bug["dup_id"] = self._string_contents(dup_id) bug["reporter_email"] = self._string_contents(soup.find("reporter")) bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to")) bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] return bug
def _parse_mks_response_for_mks_id(response): # Part of the response may be encoded as HTML entities. We need to # decode such entities so as to retrieve the text of <value> decoded = BeautifulSoup(response, convertEntities=BeautifulSoup.XML_ENTITIES) soup = BeautifulSoup(decoded.encode("UTF-8")) return int(soup.find("value").string)
def _parse_bug_page(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = self._string_contents(soup.find("short_desc")) bug["reporter_email"] = self._string_contents(soup.find("reporter")) bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to")) bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] return bug
def _parse_results_html(cls, page): parsed_results = {} tables = BeautifulSoup(page).findAll("table") for table in tables: table_title = unicode(table.findPreviousSibling("p").string) if table_title not in cls.expected_keys: # This Exception should only ever be hit if run-webkit-tests changes its results.html format. raise Exception("Unhandled title: %s" % table_title) # We might want to translate table titles into identifiers before storing. parsed_results[table_title] = [ unicode(row.find("a").string) for row in table.findAll("tr") ] return parsed_results
def load_file(self, filename): if self.filesystem.isfile(filename): try: self.test_doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) self.test_doc is None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) self.test_doc = None self.ref_doc = None
def user_dict_from_edit_user_page(self, page): soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES) user_table = soup.find("table", {'class': 'main'}) user_dict = {} for row in user_table('tr'): label_element = row.find('label') if not label_element: continue # This must not be a row we know how to parse. if row.find('table'): continue # Skip the <tr> holding the groups table. key = label_element['for'] if "group" in key: key = "groups" value = user_dict.get('groups', set()) # We must be parsing a "tr" inside the inner group table. (group_name, _) = self._group_name_and_string_from_row(row) if row.find('input', {'type': 'checkbox', 'checked': 'checked'}): value.add(group_name) else: value = unicode(row.find('td').string).strip() user_dict[key] = value return user_dict
def user_dict_from_edit_user_page(self, page): soup = BeautifulSoup(page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) user_table = soup.find("table", {'class': 'main'}) user_dict = {} for row in user_table('tr'): label_element = row.find('label') if not label_element: continue # This must not be a row we know how to parse. if row.find('table'): continue # Skip the <tr> holding the groups table. key = label_element['for'] if "group" in key: key = "groups" value = user_dict.get('groups', set()) # We must be parsing a "tr" inside the inner group table. (group_name, _) = self._group_name_and_string_from_row(row) if row.find('input', {'type': 'checkbox', 'checked': 'checked'}): value.add(group_name) else: value = unicode(row.find('td').string).strip() user_dict[key] = value return user_dict
def user_dict_from_edit_user_page(self, page): soup = BeautifulSoup(page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) user_table = soup.find("table", {"class": "main"}) user_dict = {} for row in user_table("tr"): label_element = row.find("label") if not label_element: continue # This must not be a row we know how to parse. if row.find("table"): continue # Skip the <tr> holding the groups table. key = label_element["for"] if "group" in key: key = "groups" value = user_dict.get("groups", set()) # We must be parsing a "tr" inside the inner group table. (group_name, _) = self._group_name_and_string_from_row(row) if row.find("input", {"type": "checkbox", "checked": "checked"}): value.add(group_name) else: value = unicode(row.find("td").string).strip() user_dict[key] = value return user_dict
def _check_create_bug_response(self, response_html): response_html = string_utils.decode(response_html, target_type=str) match = re.search(r'<title>Bug (?P<bug_id>\d+) Submitted[^<]*</title>', response_html) if match: return match.group('bug_id') match = re.search( '<div id="bugzilla-body">(?P<error_message>.+)<div id="footer">', response_html, re.DOTALL) error_message = "FAIL" if match: text_lines = BeautifulSoup(match.group('error_message')).findAll(text=True) error_message = "\n" + '\n'.join([" " + line.strip() for line in text_lines if line.strip()]) raise Exception("Bug not created: {}".format(error_message))
def load_file(self, filename): if self.filesystem.isfile(filename): try: self.test_doc = Parser( self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) self.test_doc is None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) self.test_doc = None self.ref_doc = None
def _parse_bug_page(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = unicode(soup.find("short_desc").string) bug["reporter_email"] = str(soup.find("reporter").string) bug["assigned_to_email"] = str(soup.find("assigned_to").string) bug["cc_emails"] = [str(element.string) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] return bug
def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = BeautifulSoup(self.filesystem.read_binary_file(filename)) except IOError: _log.error("IOError: Failed to read %s", filename) doc = None except HTMLParser.HTMLParseError: # FIXME: Figure out what to do if we can't parse the file. _log.error("HTMLParseError: Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc
def test_convert_test_harness_paths(self): """ Tests convert_testharness_paths() with a test that uses all three testharness files """ test_html = """<head> <link href="/resources/testharness.css" rel="stylesheet" type="text/css"> <script src="/resources/testharness.js"></script> <script src="/resources/testharnessreport.js"></script> </head> """ converter = W3CTestConverter() fake_dir_path = self.fake_dir_path(converter, 'testharnesspaths') doc = BeautifulSoup(test_html) oc = OutputCapture() oc.capture_output() try: converted = converter.convert_testharness_paths(doc, fake_dir_path, DUMMY_FILENAME) finally: oc.restore_output() self.verify_conversion_happened(converted) self.verify_test_harness_paths(converter, doc, fake_dir_path, 2, 1)
def login_userid_pairs_from_edit_user_results(self, results_page): soup = BeautifulSoup(results_page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) results_table = soup.find(id="admin_table") login_userid_pairs = [self._login_and_uid_from_row(row) for row in results_table('tr')] # Filter out None from the logins. return filter(lambda pair: bool(pair), login_userid_pairs)
def load_file(self, filename): if self.filesystem.exists(filename): self.test_doc = Parser(self.filesystem.read_text_file(filename)) else: self.test_doc = None self.ref_doc = None
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename): if self.filesystem.isfile(filename): try: self.test_doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) self.test_doc is None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) self.test_doc = None self.ref_doc = None def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type("match") + self.reference_links_of_type("mismatch") if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( "Multiple references are not supported. Importing the first ref defined in %s", self.filesystem.basename(self.filename), ) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]["href"]) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {"test": self.filename, "reference": ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # a test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]["href"].startswith(".."): support_files = self.support_files(self.ref_doc) test_info["refsupport"] = support_files elif self.is_jstest(): test_info = {"test": self.filename, "jstest": True} elif self.options["all"] is True and not ("-ref" in self.filename) and not ("reference" in self.filename): test_info = {"test": self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile("['\"/]?/resources/testharness"))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile(".*")) elements_with_href_attributes = doc.findAll(href=re.compile(".*")) url_pattern = re.compile("url\(.*\)") urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub("url\(['\"]?", "", url.group(0)) url = re.sub("['\"]?\)", "", url) urls.append(url) src_paths = [src_tag["src"] for src_tag in elements_with_src_attributes] href_paths = [href_tag["href"] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not (path.startswith("http:")) and not (path.startswith("mailto:")): support_files.append(path) return support_files
class TestParser(object): def __init__(self, filename, host): self.filename = filename self.host = host self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = BeautifulSoup(self.filesystem.read_binary_file(filename)) except IOError: _log.error("IOError: Failed to read %s", filename) doc = None except HTMLParser.HTMLParseError: # FIXME: Figure out what to do if we can't parse the file. _log.error("HTMLParseError: Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns: A dict which can have the properties: "test": test file name. "reference": related reference test file name if this is a reference test. "reference_support_info": extra information about the related reference test and any support files. "jstest": A boolean, whether this is a JS test. If the path doesn't look a test or the given contents are empty, then None is returned. """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = BeautifulSoup(test_contents) if ref_contents is not None: self.ref_doc = BeautifulSoup(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files. test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname( self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif 'csswg-test' in self.filename: # In csswg-test, all other files should be manual tests. # This function isn't called for non-test files in support/. test_info = {'test': self.filename} elif '-manual.' in self.filesystem.basename(self.filename): # WPT has a naming convention for manual tests. test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """Searches the file for all paths specified in url()s or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile(r'url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub(r'url\([\'\"]?', '', url.group(0)) url = re.sub(r'[\'\"]?\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not path.startswith('http:') and not path.startswith('mailto:'): uri_scheme_pattern = re.compile(r'[A-Za-z][A-Za-z+.-]*:') if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
def builder_statuses(self): soup = BeautifulSoup(self._fetch_one_box_per_builder()) return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
def _parse_bug_title_from_attachment_page(self, page): return BeautifulSoup(page).find('div', attrs={'id': 'bug_title'})
def _parse_logins_from_editusers_results(self, results_page): soup = BeautifulSoup(results_page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) results_table = soup.find(id="admin_table") logins = [self._login_from_row(row) for row in results_table('tr')] # Filter out None from the logins. return filter(lambda login: bool(login), logins)
class TestParser(object): def __init__(self, options, filename, host=Host()): self.options = options self.filename = filename self.host = host self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if (ref_file == self.filename): return {'referencefile': self.filename} if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} # not all reference tests have a <link rel='match'> element in WPT repo elif self.is_wpt_reftest(): test_info = {'test': self.filename, 'reference': self.potential_ref_filename()} test_info['reference_support_info'] = {} # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests elif self.is_wpt_manualtest(): test_info = {'test': self.filename, 'manualtest': True} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif '-ref' in self.filename or 'reference' in self.filename: test_info = {'referencefile': self.filename} elif self.options['all'] is True: test_info = {'test': self.filename} if test_info and self.is_slow_test(): test_info['slow'] = True return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def is_wpt_manualtest(self): """Returns whether the test is a manual test according WPT rules (i.e. file ends with -manual.htm path).""" return self.filename.endswith('-manual.htm') or self.filename.endswith('-manual.html') def is_slow_test(self): return any([match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long')]) def potential_ref_filename(self): parts = self.filesystem.splitext(self.filename) return parts[0] + '-ref' + parts[1] def is_wpt_reftest(self): """Returns whether the test is a ref test according WPT rules (i.e. file has a -ref.html counterpart).""" parts = self.filesystem.splitext(self.filename) return self.filesystem.isfile(self.potential_ref_filename()) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]?', '', url.group(0)) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
def _parse_quips(self, page): soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES) quips = soup.find(text=re.compile(r"Existing quips:")).findNext("ul").findAll("li") return [unicode(quip_entry.string) for quip_entry in quips]
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname( self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath( self.filesystem.dirname(self.filename), self.filesystem.dirname( ref_file)) + self.filesystem.sep test_info['reference_support_info'] = { 'reference_relpath': reference_relpath, 'files': reference_support_files } elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not ( '-ref' in self.filename) and not ('reference' in self.filename): test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool( self.test_doc.find( src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]?', '', url.group(0)) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [ src_tag['src'] for src_tag in elements_with_src_attributes ] href_paths = [ href_tag['href'] for href_tag in elements_with_href_attributes ] paths = src_paths + href_paths + urls for path in paths: if not (path.startswith('http:')) and not ( path.startswith('mailto:')): uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename): if self.filesystem.exists(filename): self.test_doc = Parser(self.filesystem.read_text_file(filename)) else: self.test_doc = None self.ref_doc = None def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename(self.filename) ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # at test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]['href'].startswith('..'): support_files = self.support_files(self.ref_doc) test_info['refsupport'] = support_files elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename): test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]', '', url.group(0)) url = re.sub('[\'\"]\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not(path.startswith('http:')) and not(path.startswith('mailto:')): support_files.append(path) return support_files
def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname( self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath( self.filesystem.dirname(self.filename), self.filesystem.dirname( ref_file)) + self.filesystem.sep test_info['reference_support_info'] = { 'reference_relpath': reference_relpath, 'files': reference_support_files } elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not ( '-ref' in self.filename) and not ('reference' in self.filename): test_info = {'test': self.filename} return test_info
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname( self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename): test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]?', '', url.group(0)) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not(path.startswith('http:')) and not(path.startswith('mailto:')): uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
def _parse_twisted_directory_listing(self, page): soup = BeautifulSoup(page) # HACK: Match only table rows with a class to ignore twisted header/footer rows. file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')}) return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
def parse_results_html(cls, page): tables = BeautifulSoup(page).findAll("table") return sum([cls._parse_results_table(table) for table in tables], [])
class TestParser(object): def __init__(self, options, filename, host=Host(), source_root_directory=None): self.options = options self.filename = filename self.host = host self.filesystem = self.host.filesystem self.source_root_directory = source_root_directory self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: href_match_file = matches[0]['href'].strip() if href_match_file.startswith('/'): ref_file = self.filesystem.join( self.source_root_directory, href_match_file.lstrip('/')) else: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), href_match_file) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if (ref_file == self.filename): return {'referencefile': self.filename} if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname( self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath( self.filesystem.dirname(self.filename), self.filesystem.dirname( ref_file)) + self.filesystem.sep test_info['reference_support_info'] = { 'reference_relpath': reference_relpath, 'files': reference_support_files } # not all reference tests have a <link rel='match'> element in WPT repo elif self.is_wpt_reftest(): test_info = { 'test': self.filename, 'reference': self.potential_ref_filename() } test_info['reference_support_info'] = {} # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests elif self.is_wpt_manualtest(): test_info = {'test': self.filename, 'manualtest': True} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif '-ref' in self.filename or 'reference' in self.filename: test_info = {'referencefile': self.filename} elif self.options['all'] is True: test_info = {'test': self.filename} if test_info and self.is_slow_test(): test_info['slow'] = True return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool( self.test_doc.find( src=re.compile('[\'\"/]?/resources/testharness'))) def is_wpt_manualtest(self): """Returns whether the test is a manual test according WPT rules.""" # General rule for manual test i.e. file ends with -manual.htm path # See https://web-platform-tests.org/writing-tests/manual.html#requirements-for-a-manual-test if self.filename.find('-manual.') != -1: return True # Rule specific to CSS WG manual tests i.e. rely on <meta name="flags"> # See https://web-platform-tests.org/writing-tests/css-metadata.html#requirement-flags # For further details and discussions, see the following links: # https://github.com/web-platform-tests/wpt/issues/5381 # https://github.com/web-platform-tests/wpt/issues/5293 for match in self.test_doc.findAll(name='meta', attrs={ 'name': 'flags', 'content': True }): css_flags = set(match['content'].split()) if bool( css_flags & { "animated", "font", "history", "interact", "paged", "speech", "userstyle" }): return True return False def is_slow_test(self): return any([ match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long') ]) def potential_ref_filename(self): parts = self.filesystem.splitext(self.filename) return parts[0] + '-ref' + parts[1] def is_wpt_reftest(self): """Returns whether the test is a ref test according WPT rules (i.e. file has a -ref.html counterpart).""" parts = self.filesystem.splitext(self.filename) return self.filesystem.isfile(self.potential_ref_filename()) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): for url in re.findall(url_pattern, url): url = re.sub('url\([\'\"]?', '', url) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [ src_tag['src'] for src_tag in elements_with_src_attributes ] href_paths = [ href_tag['href'] for href_tag in elements_with_href_attributes ] paths = src_paths + href_paths + urls for path in paths: uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
def _parse_quips(self, page): soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES) quips = soup.find( text=re.compile(r"Existing quips:")).findNext("ul").findAll("li") return [unicode(quip_entry.string) for quip_entry in quips]
def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: href_match_file = matches[0]['href'].strip() if href_match_file.startswith('/'): ref_file = self.filesystem.join( self.source_root_directory, href_match_file.lstrip('/')) else: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), href_match_file) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if (ref_file == self.filename): return {'referencefile': self.filename} if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname( self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath( self.filesystem.dirname(self.filename), self.filesystem.dirname( ref_file)) + self.filesystem.sep test_info['reference_support_info'] = { 'reference_relpath': reference_relpath, 'files': reference_support_files } # not all reference tests have a <link rel='match'> element in WPT repo elif self.is_wpt_reftest(): test_info = { 'test': self.filename, 'reference': self.potential_ref_filename() } test_info['reference_support_info'] = {} # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests elif self.is_wpt_manualtest(): test_info = {'test': self.filename, 'manualtest': True} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif '-ref' in self.filename or 'reference' in self.filename: test_info = {'referencefile': self.filename} elif self.options['all'] is True: test_info = {'test': self.filename} if test_info and self.is_slow_test(): test_info['slow'] = True return test_info
def _group_rows_from_edit_user_page(self, edit_user_page): soup = BeautifulSoup(edit_user_page, convertEntities=BeautifulSoup.HTML_ENTITIES) return soup('td', {'class': 'groupname'})
def login_userid_pairs_from_edit_user_results(self, results_page): soup = BeautifulSoup(results_page, convertEntities=BeautifulSoup.HTML_ENTITIES) results_table = soup.find(id="admin_table") login_userid_pairs = [self._login_and_uid_from_row(row) for row in results_table('tr')] # Filter out None from the logins. return list(filter(lambda pair: bool(pair), login_userid_pairs))
def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type("match") + self.reference_links_of_type("mismatch") if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( "Multiple references are not supported. Importing the first ref defined in %s", self.filesystem.basename(self.filename), ) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]["href"]) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {"test": self.filename, "reference": ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # a test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]["href"].startswith(".."): support_files = self.support_files(self.ref_doc) test_info["refsupport"] = support_files elif self.is_jstest(): test_info = {"test": self.filename, "jstest": True} elif self.options["all"] is True and not ("-ref" in self.filename) and not ("reference" in self.filename): test_info = {"test": self.filename} return test_info
def _parse_bug_page(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = self._string_contents(soup.find("short_desc")) bug["reporter_email"] = self._string_contents(soup.find("reporter")) bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to")) bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] bug["platform"] = self._string_contents(soup.find("rep_platform")) bug["os"] = self._string_contents(soup.find("op_sys")) bug["long_description"] = self._string_contents(soup.find("long_desc").findNext("thetext")) bug["keywords"] = self._string_contents(soup.find("keywords")) bug["component"] = self._string_contents(soup.find("component")) return bug