class Rebaseliner(object): """Class to produce new baselines for a given platform.""" REVISION_REGEX = r"<a href=\"(\d+)/\">" def __init__( self, running_port, target_port, platform, options, url_fetcher, zip_factory, scm, logged_before=False ): """ Args: running_port: the Port the script is running on. target_port: the Port the script uses to find port-specific configuration information like the test_expectations.txt file location and the list of test platforms. platform: the test platform to rebaseline options: the command-line options object. url_fetcher: object that can fetch objects from URLs zip_factory: optional object that can fetch zip files from URLs scm: scm object for adding new baselines logged_before: whether the previous running port logged anything. """ self._platform = platform self._options = options self._port = running_port self._filesystem = running_port._filesystem self._target_port = target_port # FIXME: This should get its PortFactory from a Host object. # Note: using running_port.executive, running_port.user since we can't get them from a host. self._rebaseline_port = PortFactory().get( platform, options, filesystem=self._filesystem, executive=running_port.executive, user=running_port.user ) self._rebaselining_tests = set() self._rebaselined_tests = [] self._logged_before = logged_before self.did_log = False # Create tests and expectations helper which is used to: # -. compile list of tests that need rebaselining. # -. update the tests in test_expectations file after rebaseline # is done. expectations_str = self._rebaseline_port.test_expectations() self._test_expectations = test_expectations.TestExpectations( self._rebaseline_port, None, expectations_str, self._rebaseline_port.test_configuration(), False ) self._url_fetcher = url_fetcher self._zip_factory = zip_factory self._scm = scm def run(self): """Run rebaseline process.""" log_dashed_string("Compiling rebaselining tests", self._platform, logging.DEBUG) if not self._compile_rebaselining_tests(): return False if not self._rebaselining_tests: return True self.did_log = True log_dashed_string("Downloading archive", self._platform, logging.DEBUG) archive_file = self._download_buildbot_archive() _log.debug("") if not archive_file: _log.error("No archive found.") return False log_dashed_string("Extracting and adding new baselines", self._platform, logging.DEBUG) self._extract_and_add_new_baselines(archive_file) archive_file.close() log_dashed_string("Updating rebaselined tests in file", self._platform) if len(self._rebaselining_tests) != len(self._rebaselined_tests): _log.debug("") _log.debug("NOT ALL TESTS WERE REBASELINED.") _log.debug(" Number marked for rebaselining: %d", len(self._rebaselining_tests)) _log.debug(" Number actually rebaselined: %d", len(self._rebaselined_tests)) _log.info("") return False _log.debug(" All tests needing rebaselining were successfully rebaselined.") _log.info("") return True def remove_rebaselining_expectations(self, tests, backup): """if backup is True, we backup the original test expectations file.""" new_expectations = self._test_expectations.remove_rebaselined_tests(tests) path = self._target_port.path_to_test_expectations_file() if backup: date_suffix = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) backup_file = "%s.orig.%s" % (path, date_suffix) if self._filesystem.exists(backup_file): self._filesystem.remove(backup_file) _log.debug('Saving original file to "%s"', backup_file) self._filesystem.move(path, backup_file) self._filesystem.write_text_file(path, new_expectations) # self._scm.add(path) def get_rebaselined_tests(self): return self._rebaselined_tests def _compile_rebaselining_tests(self): """Compile list of tests that need rebaselining for the platform. Returns: False if reftests are wrongly marked as 'needs rebaselining' or True """ self._rebaselining_tests = self._test_expectations.get_rebaselining_failures() if not self._rebaselining_tests: _log.info("%s: No tests to rebaseline.", self._platform) return True fs = self._target_port._filesystem for test in self._rebaselining_tests: test_abspath = self._target_port.abspath_for_test(test) if fs.exists(self._target_port.reftest_expected_filename(test_abspath)) or fs.exists( self._target_port.reftest_expected_mismatch_filename(test_abspath) ): _log.error("%s seems to be a reftest. We can not rebase for reftests.", test) self._rebaselining_tests = set() return False if not self._logged_before: _log.info("") _log.info("%s: Rebaselining %d tests:", self._platform, len(self._rebaselining_tests)) test_no = 1 for test in self._rebaselining_tests: _log.debug(" %d: %s", test_no, test) test_no += 1 return True def _get_latest_revision(self, url): """Get the latest layout test revision number from buildbot. Args: url: Url to retrieve layout test revision numbers. Returns: latest revision or None on failure. """ _log.debug('Url to retrieve revision: "%s"', url) content = self._url_fetcher.fetch(url) revisions = re.findall(self.REVISION_REGEX, content) if not revisions: _log.error('Failed to find revision, content: "%s"', content) return None revisions.sort(key=int) _log.debug(" Latest revision: %s", revisions[len(revisions) - 1]) return revisions[len(revisions) - 1] def _get_archive_dir_name(self, platform): """Get name of the layout test archive directory. Returns: Directory name or None on failure """ if platform in ARCHIVE_DIR_NAME_DICT: return ARCHIVE_DIR_NAME_DICT[platform] else: _log.error("Cannot find platform key %s in archive " "directory name dictionary", platform) return None def _get_archive_url(self): """Generate the url to download latest layout test archive. Returns: Url to download archive or None on failure """ if self._options.force_archive_url: return self._options.force_archive_url dir_name = self._get_archive_dir_name(self._platform) if not dir_name: return None _log.debug('Buildbot platform dir name: "%s"', dir_name) url_base = "%s/%s/" % (self._options.archive_url, dir_name) latest_revision = self._get_latest_revision(url_base) if latest_revision is None or latest_revision <= 0: return None archive_url = "%s%s/layout-test-results.zip" % (url_base, latest_revision) _log.info(" Using %s", archive_url) return archive_url def _download_buildbot_archive(self): """Download layout test archive file from buildbot and return a handle to it.""" url = self._get_archive_url() if url is None: return None archive_file = zipfileset.ZipFileSet(url, filesystem=self._filesystem, zip_factory=self._zip_factory) _log.debug("Archive downloaded") return archive_file def _extract_and_add_new_baselines(self, zip_file): """Extract new baselines from the zip file and add them to SVN repository. Returns: List of tests that have been rebaselined or None on failure.""" zip_namelist = zip_file.namelist() _log.debug("zip file namelist:") for name in zip_namelist: _log.debug(" " + name) _log.debug('Platform dir: "%s"', self._platform) self._rebaselined_tests = [] for test_no, test in enumerate(self._rebaselining_tests): _log.debug("Test %d: %s", test_no + 1, test) self._extract_and_add_new_baseline(test, zip_file) def _extract_and_add_new_baseline(self, test, zip_file): found = False scm_error = False test_basename = self._filesystem.splitext(test)[0] for suffix in BASELINE_SUFFIXES: archive_test_name = "layout-test-results/%s-actual%s" % (test_basename, suffix) _log.debug(' Archive test file name: "%s"', archive_test_name) if not archive_test_name in zip_file.namelist(): _log.debug(" %s file not in archive.", suffix) continue found = True _log.debug(" %s file found in archive.", suffix) temp_name = self._extract_from_zip_to_tempfile(zip_file, archive_test_name) expected_filename = "%s-expected%s" % (test_basename, suffix) expected_fullpath = self._filesystem.join(self._rebaseline_port.baseline_path(), expected_filename) expected_fullpath = self._filesystem.normpath(expected_fullpath) _log.debug(' Expected file full path: "%s"', expected_fullpath) relpath = self._filesystem.relpath(expected_fullpath, self._target_port.layout_tests_dir()) # TODO(victorw): for now, the rebaselining tool checks whether # or not THIS baseline is duplicate and should be skipped. # We could improve the tool to check all baselines in upper # and lower levels and remove all duplicated baselines. if self._is_dup_baseline(temp_name, expected_fullpath, test, suffix, self._platform): self._filesystem.remove(temp_name) if self._filesystem.exists(expected_fullpath): _log.info(" Removing %s" % relpath) self._delete_baseline(expected_fullpath) _log.debug(" %s is a duplicate" % relpath) # FIXME: We consider a duplicate baseline a success in the normal case. # FIXME: This may not be what you want sometimes; should this be # FIXME: controllable? self._rebaselined_tests.append(test) continue if suffix == ".checksum" and self._png_has_same_checksum(temp_name, test, expected_fullpath): self._filesystem.remove(temp_name) # If an old checksum exists, delete it. self._delete_baseline(expected_fullpath) continue self._filesystem.maybe_make_directory(self._filesystem.dirname(expected_fullpath)) self._filesystem.move(temp_name, expected_fullpath) path_from_base = self._filesystem.relpath(expected_fullpath) if self._scm.exists(path_from_base): _log.info(" Updating %s" % relpath) else: _log.info(" Adding %s" % relpath) if self._scm.add(expected_fullpath, return_exit_code=True): # FIXME: print detailed diagnose messages scm_error = True elif suffix != ".checksum": self._create_html_baseline_files(expected_fullpath) if not found: _log.warn("No results in archive for %s" % test) elif scm_error: _log.warn("Failed to add baselines to your repository.") else: _log.debug(" Rebaseline succeeded.") self._rebaselined_tests.append(test) def _extract_from_zip_to_tempfile(self, zip_file, filename): """Extracts |filename| from |zip_file|, a ZipFileSet. Returns the full path name to the extracted file.""" data = zip_file.read(filename) suffix = self._filesystem.splitext(filename)[1] tempfile, temp_name = self._filesystem.open_binary_tempfile(suffix) tempfile.write(data) tempfile.close() return temp_name def _png_has_same_checksum(self, checksum_path, test, checksum_expected_fullpath): """Returns True if the fallback png for |checksum_expected_fullpath| contains the same checksum.""" fs = self._filesystem png_fullpath = self._first_fallback_png_for_test(test) if not fs.exists(png_fullpath): _log.error(" Checksum without png file found! Expected %s to exist." % png_fullpath) return False with fs.open_binary_file_for_reading(png_fullpath) as filehandle: checksum_in_png = read_checksum_from_png.read_checksum(filehandle) checksum_in_text_file = fs.read_text_file(checksum_path) if checksum_in_png and checksum_in_png != checksum_in_text_file: _log.error( " checksum in %s and %s don't match! Continuing" " to copy but please investigate." % (checksum_expected_fullpath, png_fullpath) ) return checksum_in_text_file == checksum_in_png def _first_fallback_png_for_test(self, test): all_baselines = self._rebaseline_port.expected_baselines(test, ".png", True) return self._filesystem.join(all_baselines[0][0], all_baselines[0][1]) def _is_dup_baseline(self, new_baseline, baseline_path, test, suffix, platform): """Check whether a baseline is duplicate and can fallback to same baseline for another platform. For example, if a test has same baseline on linux and windows, then we only store windows baseline and linux baseline will fallback to the windows version. Args: new_baseline: temp filename containing the new baseline results baseline_path: baseline expectation file name. test: test name. suffix: file suffix of the expected results, including dot; e.g. '.txt' or '.png'. platform: baseline platform 'mac', 'win' or 'linux'. Returns: True if the baseline is unnecessary. False otherwise. """ all_baselines = self._rebaseline_port.expected_baselines(test, suffix, True) for fallback_dir, fallback_file in all_baselines: if not fallback_dir or not fallback_file: continue fallback_fullpath = self._filesystem.normpath(self._filesystem.join(fallback_dir, fallback_file)) if fallback_fullpath.lower() == baseline_path.lower(): continue fallback_dir_relpath = self._filesystem.relpath(fallback_dir, self._target_port.layout_tests_dir()) if fallback_dir_relpath == "": fallback_dir_relpath = "<generic>" new_output = self._filesystem.read_binary_file(new_baseline) fallback_output = self._filesystem.read_binary_file(fallback_fullpath) is_image = baseline_path.lower().endswith(".png") if not self._diff_baselines(new_output, fallback_output, is_image): _log.info(" Skipping %s (matches %s)", test, fallback_dir_relpath) return True return False return False def _diff_baselines(self, output1, output2, is_image): """Check whether two baselines are different. Args: output1, output2: contents of the baselines to compare. Returns: True if two files are different or have different extensions. False otherwise. """ if is_image: return self._port.diff_image(output1, output2)[0] return self._port.compare_text(output1, output2) def _delete_baseline(self, filename): """Remove the file from repository and delete it from disk. Args: filename: full path of the file to delete. """ if not filename or not self._filesystem.isfile(filename): return self._scm.delete(filename) def _create_html_baseline_files(self, baseline_fullpath): """Create baseline files (old, new and diff) in html directory. The files are used to compare the rebaselining results. Args: baseline_fullpath: full path of the expected baseline file. """ baseline_relpath = self._filesystem.relpath(baseline_fullpath) _log.debug(' Html: create baselines for "%s"', baseline_relpath) if not baseline_fullpath or not self._filesystem.exists(baseline_fullpath): _log.debug(' Html: Does not exist: "%s"', baseline_fullpath) return if not self._scm.exists(baseline_relpath): _log.debug(' Html: Does not exist in scm: "%s"', baseline_relpath) return # Copy the new baseline to html directory for result comparison. baseline_filename = self._filesystem.basename(baseline_fullpath) new_file = get_result_file_fullpath( self._filesystem, self._options.html_directory, baseline_filename, self._platform, "new" ) self._filesystem.copyfile(baseline_fullpath, new_file) _log.debug(' Html: copied new baseline file from "%s" to "%s".', baseline_fullpath, new_file) # Get the old baseline from the repository and save to the html directory. try: output = self._scm.show_head(baseline_relpath) except ScriptError, e: _log.warning(e) output = "" if (not output) or (output.upper().rstrip().endswith("NO SUCH FILE OR DIRECTORY")): _log.warning(' No base file: "%s"', baseline_fullpath) return base_file = get_result_file_fullpath( self._filesystem, self._options.html_directory, baseline_filename, self._platform, "old" ) if base_file.upper().endswith(".PNG"): self._filesystem.write_binary_file(base_file, output) else: self._filesystem.write_text_file(base_file, output) _log.debug(' Html: created old baseline file: "%s".', base_file) # Get the diff between old and new baselines and save to the html dir. diff_file = get_result_file_fullpath( self._filesystem, self._options.html_directory, baseline_filename, self._platform, "diff" ) has_diff = False if baseline_filename.upper().endswith(".TXT"): output = self._scm.diff_for_file(baseline_relpath, log=_log) if output: self._filesystem.write_text_file(diff_file, output) has_diff = True elif baseline_filename.upper().endswith(".PNG"): old_file = get_result_file_fullpath( self._filesystem, self._options.html_directory, baseline_filename, self._platform, "old" ) new_file = get_result_file_fullpath( self._filesystem, self._options.html_directory, baseline_filename, self._platform, "new" ) _log.debug(' Html: diffing "%s" and "%s"', old_file, new_file) old_output = self._filesystem.read_binary_file(old_file) new_output = self._filesystem.read_binary_file(new_file) image_diff = self._port.diff_image(old_output, new_output)[0] self._filesystem.write_binary_file(diff_file, image_diff) if has_diff: _log.debug(' Html: created baseline diff file: "%s".', diff_file)