def BuildPatchedWpr(): shutil.copyfile(self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) # Save up original response headers. original_response_headers = {e.url: e.GetResponseHeadersDict() \ for e in wpr_archive.ListUrlEntries()} logging.info('save up response headers for %d resources', len(original_response_headers)) if not original_response_headers: # TODO(gabadie): How is it possible to not even have the main resource # in the WPR archive? Example URL can be found in: # http://crbug.com/623966#c5 raise Exception( 'Looks like no resources were recorded in WPR during: {}'. format(self._common_builder.original_wpr_task.name)) with open(self._original_headers_path, 'w') as file_output: json.dump(original_response_headers, file_output) # Patch WPR. wpr_url_entries = wpr_archive.ListUrlEntries() for wpr_url_entry in wpr_url_entries: sandwich_utils.PatchWprEntryToBeCached(wpr_url_entry) logging.info('number of patched entries: %d', len(wpr_url_entries)) wpr_archive.Persist()
def PatchWpr(wpr_archive_path): """Patches a WPR archive to get all resources into the HTTP cache and avoid invalidation and revalidations. Args: wpr_archive_path: Path of the WPR archive to patch. """ # Sets the resources cache max-age to 10 years. MAX_AGE = 10 * 365 * 24 * 60 * 60 CACHE_CONTROL = 'public, max-age={}'.format(MAX_AGE) wpr_archive = wpr_backend.WprArchiveBackend(wpr_archive_path) for url_entry in wpr_archive.ListUrlEntries(): response_headers = url_entry.GetResponseHeadersDict() if 'cache-control' in response_headers and \ response_headers['cache-control'] == CACHE_CONTROL: continue logging.info('patching %s' % url_entry.url) # TODO(gabadie): may need to patch Last-Modified and If-Modified-Since. # TODO(gabadie): may need to delete ETag. # TODO(gabadie): may need to patch Vary. # TODO(gabadie): may need to take care of x-cache. # # Override the cache-control header to set the resources max age to MAX_AGE. # # Important note: Some resources holding sensitive information might have # cache-control set to no-store which allow the resource to be cached but # not cached in the file system. NoState-Prefetch is going to take care of # this case. But in here, to simulate NoState-Prefetch, we don't have other # choices but save absolutely all cached resources on disk so they survive # after killing chrome for cache save, modification and push. url_entry.SetResponseHeader('cache-control', CACHE_CONTROL) wpr_archive.Persist()
def BuildPatchedWpr(): shutil.copyfile(self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) wpr_url_entries = wpr_archive.ListUrlEntries() for wpr_url_entry in wpr_url_entries: sandwich_utils.PatchWprEntryToBeCached(wpr_url_entry) logging.info('number of patched entries: %d', len(wpr_url_entries)) wpr_archive.Persist()
def BuildPatchedWpr(): common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) shutil.copyfile( self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) # Save up original response headers. original_response_headers = {e.url: e.GetResponseHeadersDict() \ for e in wpr_archive.ListUrlEntries()} with open(self._original_headers_path, 'w') as file_output: json.dump(original_response_headers, file_output) # Patch WPR. _PatchWpr(wpr_archive) wpr_archive.Persist()
def PatchWpr(wpr_archive_path): """Patches a WPR archive to get all resources into the HTTP cache and avoid invalidation and revalidations. Args: wpr_archive_path: Path of the WPR archive to patch. """ # Sets the resources cache max-age to 10 years. MAX_AGE = 10 * 365 * 24 * 60 * 60 CACHE_CONTROL = 'public, max-age={}'.format(MAX_AGE) wpr_archive = wpr_backend.WprArchiveBackend(wpr_archive_path) for url_entry in wpr_archive.ListUrlEntries(): response_headers = url_entry.GetResponseHeadersDict() if 'cache-control' in response_headers and \ response_headers['cache-control'] == CACHE_CONTROL: continue logging.info('patching %s' % url_entry.url) # TODO(gabadie): may need to patch Last-Modified and If-Modified-Since. # TODO(gabadie): may need to delete ETag. # TODO(gabadie): may need to take care of x-cache. # # Override the cache-control header to set the resources max age to MAX_AGE. # # Important note: Some resources holding sensitive information might have # cache-control set to no-store which allow the resource to be cached but # not cached in the file system. NoState-Prefetch is going to take care of # this case. But in here, to simulate NoState-Prefetch, we don't have other # choices but save absolutely all cached resources on disk so they survive # after killing chrome for cache save, modification and push. url_entry.SetResponseHeader('cache-control', CACHE_CONTROL) # TODO(gabadie): May need to extend Vary blacklist (referer?) # # All of these Vary and Pragma possibilities need to be removed from # response headers in order for Chrome to store a resource in HTTP cache and # not to invalidate it. # # Note: HttpVaryData::Init() in Chrome adds an implicit 'Vary: cookie' # header to any redirect. # TODO(gabadie): Find a way to work around this issue. url_entry.RemoveResponseHeaderDirectives('vary', {'*', 'cookie'}) url_entry.RemoveResponseHeaderDirectives('pragma', {'no-cache'}) wpr_archive.Persist()