def test_convert_vendor_prefix_js_paths(self): test_html = """<head> <script src="/common/vendor-prefix.js"> </head> """ fake_dir_path = self.fake_dir_path('adapterjspaths') converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME) oc = OutputCapture() oc.capture_output() try: converter.feed(test_html) converter.close() converted = converter.output() finally: oc.restore_output() new_html = BeautifulSoup(converted[1]) # Verify the original paths are gone, and the new paths are present. orig_path_pattern = re.compile('\"/common/vendor-prefix.js') self.assertEquals(len(new_html.findAll(src=orig_path_pattern)), 0, 'vendor-prefix.js path was not converted') resources_dir = converter.path_from_webkit_root("tests", "resources") new_relpath = os.path.relpath(resources_dir, fake_dir_path) relpath_pattern = re.compile(new_relpath) self.assertEquals(len(new_html.findAll(src=relpath_pattern)), 1, 'vendor-prefix.js relative path not correct')
def test_convert_vendor_prefix_js_paths(self): test_html = """<head> <script src="/common/vendor-prefix.js"> </head> """ fake_dir_path = self.fake_dir_path('adapterjspaths') converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME) oc = OutputCapture() oc.capture_output() try: converter.feed(test_html) converter.close() converted = converter.output() finally: oc.restore_output() new_html = BeautifulSoup(converted[1]) # Verify the original paths are gone, and the new paths are present. orig_path_pattern = re.compile('\"/common/vendor-prefix.js') self.assertEquals(len(new_html.findAll(src=orig_path_pattern)), 0, 'vendor-prefix.js path was not converted') resources_dir = converter.path_from_webkit_root("LayoutTests", "resources") new_relpath = os.path.relpath(resources_dir, fake_dir_path) relpath_pattern = re.compile(new_relpath) self.assertEquals(len(new_html.findAll(src=relpath_pattern)), 1, 'vendor-prefix.js relative path not correct')
def verify_test_harness_paths(self, converter, converted, test_path, num_src_paths, num_href_paths): if isinstance(converted, basestring): converted = BeautifulSoup(converted) resources_dir = converter.path_from_webkit_root("LayoutTests", "resources") # Verify the original paths are gone, and the new paths are present. orig_path_pattern = re.compile('\"/resources/testharness') self.assertEquals(len(converted.findAll(src=orig_path_pattern)), 0, 'testharness src path was not converted') self.assertEquals(len(converted.findAll(href=orig_path_pattern)), 0, 'testharness href path was not converted') new_relpath = os.path.relpath(resources_dir, test_path) relpath_pattern = re.compile(new_relpath) self.assertEquals(len(converted.findAll(src=relpath_pattern)), num_src_paths, 'testharness src relative path not correct') self.assertEquals(len(converted.findAll(href=relpath_pattern)), num_href_paths, 'testharness href relative path not correct')
def _parse_bug_page(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = self._string_contents(soup.find("short_desc")) bug["reporter_email"] = self._string_contents(soup.find("reporter")) bug["assigned_to_email"] = self._string_contents( soup.find("assigned_to")) bug["cc_emails"] = [ self._string_contents(element) for element in soup.findAll('cc') ] bug["attachments"] = [ self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment') ] return bug
def _parse_attachment_ids_request_query(self, page, since=None): # Formats digits = re.compile("\d+") attachment_href = re.compile("attachment.cgi\?id=\d+&action=review") # if no date is given, return all ids if not since: attachment_links = SoupStrainer("a", href=attachment_href) return [int(digits.search(tag["href"]).group(0)) for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)] # Parse the main table only date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}") mtab = SoupStrainer("table", {"class": "requests"}) soup = BeautifulSoup(page, parseOnlyThese=mtab) patch_ids = [] for row in soup.findAll("tr"): patch_tag = row.find("a", {"href": attachment_href}) if not patch_tag: continue patch_id = int(digits.search(patch_tag["href"]).group(0)) date_tag = row.find("td", text=date_format) if date_tag and datetime.strptime(date_format.search(date_tag).group(0), "%Y-%m-%d %H:%M") < since: _log.info("Patch is old: %d (%s)" % (patch_id, date_tag)) continue patch_ids.append(patch_id) return patch_ids
def _parse_attachment_ids_request_query(self, page, since=None): # Formats digits = re.compile("\d+") attachment_href = re.compile("attachment.cgi\?id=\d+&action=review") # if no date is given, return all ids if not since: attachment_links = SoupStrainer("a", href=attachment_href) return [ int(digits.search(tag["href"]).group(0)) for tag in BeautifulSoup(page, parseOnlyThese=attachment_links) ] # Parse the main table only date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}") mtab = SoupStrainer("table", {"class": "requests"}) soup = BeautifulSoup(page, parseOnlyThese=mtab) patch_ids = [] for row in soup.findAll("tr"): patch_tag = row.find("a", {"href": attachment_href}) if not patch_tag: continue patch_id = int(digits.search(patch_tag["href"]).group(0)) date_tag = row.find("td", text=date_format) if date_tag and datetime.strptime( date_format.search(date_tag).group(0), "%Y-%m-%d %H:%M") < since: continue patch_ids.append(patch_id) return patch_ids
def _parse_bug_page(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = unicode(soup.find("short_desc").string) bug["reporter_email"] = str(soup.find("reporter").string) bug["assigned_to_email"] = str(soup.find("assigned_to").string) bug["cc_emails"] = [str(element.string) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] return bug
def _parse_bug_page(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = self._string_contents(soup.find("short_desc")) bug["reporter_email"] = self._string_contents(soup.find("reporter")) bug["assigned_to_email"] = self._string_contents( soup.find("assigned_to")) bug["cc_emails"] = [ self._string_contents(element) for element in soup.findAll('cc') ] bug["attachments"] = [ self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment') ] bug["platform"] = self._string_contents(soup.find("rep_platform")) bug["os"] = self._string_contents(soup.find("op_sys")) bug["long_description"] = self._string_contents( soup.find("long_desc").findNext("thetext")) bug["keywords"] = self._string_contents(soup.find("keywords")) bug["component"] = self._string_contents(soup.find("component")) return bug
def _parse_bug_dictionary_from_xml(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = self._string_contents(soup.find("short_desc")) bug["bug_status"] = self._string_contents(soup.find("bug_status")) dup_id = soup.find("dup_id") if dup_id: bug["dup_id"] = self._string_contents(dup_id) bug["reporter_email"] = self._string_contents(soup.find("reporter")) bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to")) bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] return bug
def _parse_bug_page(self, page): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) bug["title"] = self._string_contents(soup.find("short_desc")) bug["reporter_email"] = self._string_contents(soup.find("reporter")) bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to")) bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] bug["platform"] = self._string_contents(soup.find("rep_platform")) bug["os"] = self._string_contents(soup.find("op_sys")) bug["long_description"] = self._string_contents(soup.find("long_desc").findNext("thetext")) bug["keywords"] = self._string_contents(soup.find("keywords")) bug["component"] = self._string_contents(soup.find("component")) return bug
class TestParser(object): def __init__(self, filename, host): self.filename = filename self.host = host self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = BeautifulSoup(self.filesystem.read_binary_file(filename)) except IOError: _log.error("IOError: Failed to read %s", filename) doc = None except HTMLParser.HTMLParseError: # FIXME: Figure out what to do if we can't parse the file. _log.error("HTMLParseError: Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns: A dict which can have the properties: "test": test file name. "reference": related reference test file name if this is a reference test. "reference_support_info": extra information about the related reference test and any support files. "jstest": A boolean, whether this is a JS test. If the path doesn't look a test or the given contents are empty, then None is returned. """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = BeautifulSoup(test_contents) if ref_contents is not None: self.ref_doc = BeautifulSoup(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files. test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname( self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath( self.filesystem.dirname(self.filename), self.filesystem.dirname( ref_file)) + self.filesystem.sep test_info['reference_support_info'] = { 'reference_relpath': reference_relpath, 'files': reference_support_files } elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif 'csswg-test' in self.filename: # In csswg-test, all other files should be manual tests. # This function isn't called for non-test files in support/. test_info = {'test': self.filename} elif '-manual.' in self.filesystem.basename(self.filename): # WPT has a naming convention for manual tests. test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool( self.test_doc.find( src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """Searches the file for all paths specified in url()s or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile(r'url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub(r'url\([\'\"]?', '', url.group(0)) url = re.sub(r'[\'\"]?\)', '', url) urls.append(url) src_paths = [ src_tag['src'] for src_tag in elements_with_src_attributes ] href_paths = [ href_tag['href'] for href_tag in elements_with_href_attributes ] paths = src_paths + href_paths + urls for path in paths: if not path.startswith('http:') and not path.startswith('mailto:'): uri_scheme_pattern = re.compile(r'[A-Za-z][A-Za-z+.-]*:') if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename): if self.filesystem.exists(filename): self.test_doc = Parser(self.filesystem.read_text_file(filename)) else: self.test_doc = None self.ref_doc = None def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename(self.filename) ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # at test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]['href'].startswith('..'): support_files = self.support_files(self.ref_doc) test_info['refsupport'] = support_files elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename): test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]', '', url.group(0)) url = re.sub('[\'\"]\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not(path.startswith('http:')) and not(path.startswith('mailto:')): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename, host=Host(), source_root_directory=None): self.options = options self.filename = filename self.host = host self.filesystem = self.host.filesystem self.source_root_directory = source_root_directory self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: href_match_file = matches[0]['href'].strip() if href_match_file.startswith('/'): ref_file = self.filesystem.join( self.source_root_directory, href_match_file.lstrip('/')) else: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), href_match_file) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if (ref_file == self.filename): return {'referencefile': self.filename} if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname( self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath( self.filesystem.dirname(self.filename), self.filesystem.dirname( ref_file)) + self.filesystem.sep test_info['reference_support_info'] = { 'reference_relpath': reference_relpath, 'files': reference_support_files } # not all reference tests have a <link rel='match'> element in WPT repo elif self.is_wpt_reftest(): test_info = { 'test': self.filename, 'reference': self.potential_ref_filename() } test_info['reference_support_info'] = {} # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests elif self.is_wpt_manualtest(): test_info = {'test': self.filename, 'manualtest': True} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif '-ref' in self.filename or 'reference' in self.filename: test_info = {'referencefile': self.filename} elif self.options['all'] is True: test_info = {'test': self.filename} if test_info and self.is_slow_test(): test_info['slow'] = True return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool( self.test_doc.find( src=re.compile('[\'\"/]?/resources/testharness'))) def is_wpt_manualtest(self): """Returns whether the test is a manual test according WPT rules.""" # General rule for manual test i.e. file ends with -manual.htm path # See https://web-platform-tests.org/writing-tests/manual.html#requirements-for-a-manual-test if self.filename.find('-manual.') != -1: return True # Rule specific to CSS WG manual tests i.e. rely on <meta name="flags"> # See https://web-platform-tests.org/writing-tests/css-metadata.html#requirement-flags # For further details and discussions, see the following links: # https://github.com/web-platform-tests/wpt/issues/5381 # https://github.com/web-platform-tests/wpt/issues/5293 for match in self.test_doc.findAll(name='meta', attrs={ 'name': 'flags', 'content': True }): css_flags = set(match['content'].split()) if bool( css_flags & { "animated", "font", "history", "interact", "paged", "speech", "userstyle" }): return True return False def is_slow_test(self): return any([ match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long') ]) def potential_ref_filename(self): parts = self.filesystem.splitext(self.filename) return parts[0] + '-ref' + parts[1] def is_wpt_reftest(self): """Returns whether the test is a ref test according WPT rules (i.e. file has a -ref.html counterpart).""" parts = self.filesystem.splitext(self.filename) return self.filesystem.isfile(self.potential_ref_filename()) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): for url in re.findall(url_pattern, url): url = re.sub('url\([\'\"]?', '', url) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [ src_tag['src'] for src_tag in elements_with_src_attributes ] href_paths = [ href_tag['href'] for href_tag in elements_with_href_attributes ] paths = src_paths + href_paths + urls for path in paths: uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname( self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath( self.filesystem.dirname(self.filename), self.filesystem.dirname( ref_file)) + self.filesystem.sep test_info['reference_support_info'] = { 'reference_relpath': reference_relpath, 'files': reference_support_files } elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not ( '-ref' in self.filename) and not ('reference' in self.filename): test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool( self.test_doc.find( src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]?', '', url.group(0)) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [ src_tag['src'] for src_tag in elements_with_src_attributes ] href_paths = [ href_tag['href'] for href_tag in elements_with_href_attributes ] paths = src_paths + href_paths + urls for path in paths: if not (path.startswith('http:')) and not ( path.startswith('mailto:')): uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename): if self.filesystem.isfile(filename): try: self.test_doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) self.test_doc is None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) self.test_doc = None self.ref_doc = None def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type("match") + self.reference_links_of_type("mismatch") if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( "Multiple references are not supported. Importing the first ref defined in %s", self.filesystem.basename(self.filename), ) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]["href"]) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {"test": self.filename, "reference": ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # a test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]["href"].startswith(".."): support_files = self.support_files(self.ref_doc) test_info["refsupport"] = support_files elif self.is_jstest(): test_info = {"test": self.filename, "jstest": True} elif self.options["all"] is True and not ("-ref" in self.filename) and not ("reference" in self.filename): test_info = {"test": self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile("['\"/]?/resources/testharness"))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile(".*")) elements_with_href_attributes = doc.findAll(href=re.compile(".*")) url_pattern = re.compile("url\(.*\)") urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub("url\(['\"]?", "", url.group(0)) url = re.sub("['\"]?\)", "", url) urls.append(url) src_paths = [src_tag["src"] for src_tag in elements_with_src_attributes] href_paths = [href_tag["href"] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not (path.startswith("http:")) and not (path.startswith("mailto:")): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename): if self.filesystem.isfile(filename): try: self.test_doc = Parser( self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) self.test_doc is None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) self.test_doc = None self.ref_doc = None def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type( 'match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning( 'Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join( self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.ref_doc = self.load_file(ref_file) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file path is relative, we need to check it for # relative paths also because when it lands in WebKit, it will be # moved down into the test dir. # # Note: The test files themselves are not checked for support files # outside their directories as the convention in the CSSWG is to # put all support files in the same dir or subdir as the test. # # All non-test files in the test's directory tree are normally # copied as part of the import as they are assumed to be required # support files. # # *But*, there is exactly one case in the entire css2.1 suite where # a test depends on a file that lives in a different directory, # which depends on another file that lives outside of its # directory. This code covers that case :) if matches[0]['href'].startswith('..'): support_files = self.support_files(self.ref_doc) test_info['refsupport'] = support_files elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not ( '-ref' in self.filename) and not ('reference' in self.filename): test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool( self.test_doc.find( src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]?', '', url.group(0)) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [ src_tag['src'] for src_tag in elements_with_src_attributes ] href_paths = [ href_tag['href'] for href_tag in elements_with_href_attributes ] paths = src_paths + href_paths + urls for path in paths: if not (path.startswith('http:')) and not ( path.startswith('mailto:')): support_files.append(path) return support_files
class TestParser(object): def __init__(self, filename, host): self.filename = filename self.host = host self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = BeautifulSoup(self.filesystem.read_binary_file(filename)) except IOError: _log.error("IOError: Failed to read %s", filename) doc = None except HTMLParser.HTMLParseError: # FIXME: Figure out what to do if we can't parse the file. _log.error("HTMLParseError: Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns: A dict which can have the properties: "test": test file name. "reference": related reference test file name if this is a reference test. "reference_support_info": extra information about the related reference test and any support files. "jstest": A boolean, whether this is a JS test. If the path doesn't look a test or the given contents are empty, then None is returned. """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = BeautifulSoup(test_contents) if ref_contents is not None: self.ref_doc = BeautifulSoup(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files. test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname( self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif 'csswg-test' in self.filename: # In csswg-test, all other files should be manual tests. # This function isn't called for non-test files in support/. test_info = {'test': self.filename} elif '-manual.' in self.filesystem.basename(self.filename): # WPT has a naming convention for manual tests. test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """Searches the file for all paths specified in url()s or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile(r'url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub(r'url\([\'\"]?', '', url.group(0)) url = re.sub(r'[\'\"]?\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not path.startswith('http:') and not path.startswith('mailto:'): uri_scheme_pattern = re.compile(r'[A-Za-z][A-Za-z+.-]*:') if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename, host=Host()): self.options = options self.filename = filename self.host = host self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if (ref_file == self.filename): return {'referencefile': self.filename} if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} # not all reference tests have a <link rel='match'> element in WPT repo elif self.is_wpt_reftest(): test_info = {'test': self.filename, 'reference': self.potential_ref_filename()} test_info['reference_support_info'] = {} # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests elif self.is_wpt_manualtest(): test_info = {'test': self.filename, 'manualtest': True} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif '-ref' in self.filename or 'reference' in self.filename: test_info = {'referencefile': self.filename} elif self.options['all'] is True: test_info = {'test': self.filename} if test_info and self.is_slow_test(): test_info['slow'] = True return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def is_wpt_manualtest(self): """Returns whether the test is a manual test according WPT rules (i.e. file ends with -manual.htm path).""" return self.filename.endswith('-manual.htm') or self.filename.endswith('-manual.html') def is_slow_test(self): return any([match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long')]) def potential_ref_filename(self): parts = self.filesystem.splitext(self.filename) return parts[0] + '-ref' + parts[1] def is_wpt_reftest(self): """Returns whether the test is a ref test according WPT rules (i.e. file has a -ref.html counterpart).""" parts = self.filesystem.splitext(self.filename) return self.filesystem.isfile(self.potential_ref_filename()) def support_files(self, doc): """ Searches the file for all paths specified in url()'s, href or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]?', '', url.group(0)) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files
class TestParser(object): def __init__(self, options, filename): self.options = options self.filename = filename self.host = Host() self.filesystem = self.host.filesystem self.test_doc = None self.ref_doc = None self.load_file(filename) def load_file(self, filename, is_ref=False): if self.filesystem.isfile(filename): try: doc = Parser(self.filesystem.read_binary_file(filename)) except: # FIXME: Figure out what to do if we can't parse the file. _log.error("Failed to parse %s", filename) doc = None else: if self.filesystem.isdir(filename): # FIXME: Figure out what is triggering this and what to do about it. _log.error("Trying to load %s, which is a directory", filename) doc = None if is_ref: self.ref_doc = doc else: self.test_doc = doc def analyze_test(self, test_contents=None, ref_contents=None): """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """ test_info = None if test_contents is None and self.test_doc is None: return test_info if test_contents is not None: self.test_doc = Parser(test_contents) if ref_contents is not None: self.ref_doc = Parser(ref_contents) # First check if it's a reftest matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch') if matches: if len(matches) > 1: # FIXME: Is this actually true? We should fix this. _log.warning('Multiple references are not supported. Importing the first ref defined in %s', self.filesystem.basename(self.filename)) try: ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href']) except KeyError as e: # FIXME: Figure out what to do w/ invalid test files. _log.error('%s has a reference link but is missing the "href"', self.filesystem) return None if self.ref_doc is None: self.load_file(ref_file, True) test_info = {'test': self.filename, 'reference': ref_file} # If the ref file does not live in the same directory as the test file, check it for support files test_info['reference_support_info'] = {} if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename): reference_support_files = self.support_files(self.ref_doc) if len(reference_support_files) > 0: reference_relpath = self.filesystem.relpath(self.filesystem.dirname( self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files} elif self.is_jstest(): test_info = {'test': self.filename, 'jstest': True} elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename): test_info = {'test': self.filename} return test_info def reference_links_of_type(self, reftest_type): return self.test_doc.findAll(rel=reftest_type) def is_jstest(self): """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths.""" return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness'))) def support_files(self, doc): """ Searches the file for all paths specified in url()'s or src attributes.""" support_files = [] if doc is None: return support_files elements_with_src_attributes = doc.findAll(src=re.compile('.*')) elements_with_href_attributes = doc.findAll(href=re.compile('.*')) url_pattern = re.compile('url\(.*\)') urls = [] for url in doc.findAll(text=url_pattern): url = re.search(url_pattern, url) url = re.sub('url\([\'\"]?', '', url.group(0)) url = re.sub('[\'\"]?\)', '', url) urls.append(url) src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes] href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes] paths = src_paths + href_paths + urls for path in paths: if not(path.startswith('http:')) and not(path.startswith('mailto:')): uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:") if not uri_scheme_pattern.match(path): support_files.append(path) return support_files