def test_simple(self): result = http.UrlResult('http://example.com', {}, '', 200, None) assert discover_sourcemap(result) is None result = http.UrlResult('http://example.com', { 'x-sourcemap': 'http://example.com/source.map.js' }, '', 200, None) assert discover_sourcemap(result) == 'http://example.com/source.map.js' result = http.UrlResult('http://example.com', { 'sourcemap': 'http://example.com/source.map.js' }, '', 200, None) assert discover_sourcemap(result) == 'http://example.com/source.map.js' result = http.UrlResult('http://example.com', {}, '//@ sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)', 200, None) assert discover_sourcemap(result) == 'http://example.com/source.map.js' result = http.UrlResult('http://example.com', {}, '//# sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)', 200, None) assert discover_sourcemap(result) == 'http://example.com/source.map.js' result = http.UrlResult('http://example.com', {}, 'console.log(true)\n//@ sourceMappingURL=http://example.com/source.map.js', 200, None) assert discover_sourcemap(result) == 'http://example.com/source.map.js' result = http.UrlResult('http://example.com', {}, 'console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js', 200, None) assert discover_sourcemap(result) == 'http://example.com/source.map.js' result = http.UrlResult('http://example.com', {}, 'console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js\n//# sourceMappingURL=http://example.com/source2.map.js', 200, None) assert discover_sourcemap(result) == 'http://example.com/source2.map.js' result = http.UrlResult('http://example.com', {}, '//# sourceMappingURL=app.map.js/*ascii:lol*/', 200, None) assert discover_sourcemap(result) == 'http://example.com/app.map.js' result = http.UrlResult('http://example.com', {}, '//# sourceMappingURL=/*lol*/', 200, None) with self.assertRaises(AssertionError): discover_sourcemap(result)
def test_distribution(self): project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) foo_file = File.objects.create( name="file.min.js", type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) foo_file.putfile(six.BytesIO("foo")) foo_dist = release.add_dist("foo") ReleaseFile.objects.create( name="file.min.js", release=release, dist=foo_dist, organization_id=project.organization_id, file=foo_file, ) bar_file = File.objects.create( name="file.min.js", type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) bar_file.putfile(six.BytesIO("bar")) bar_dist = release.add_dist("bar") ReleaseFile.objects.create( name="file.min.js", release=release, dist=bar_dist, organization_id=project.organization_id, file=bar_file, ) foo_result = fetch_release_file("file.min.js", release, foo_dist) assert isinstance(foo_result.body, six.binary_type) assert foo_result == http.UrlResult( "file.min.js", {"content-type": "application/json; charset=utf-8"}, "foo", 200, "utf-8") # test that cache pays attention to dist value as well as name bar_result = fetch_release_file("file.min.js", release, bar_dist) # result is cached, but that's not what we should find assert bar_result != foo_result assert bar_result == http.UrlResult( "file.min.js", {"content-type": "application/json; charset=utf-8"}, "bar", 200, "utf-8")
def test_unicode(self): project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) file = File.objects.create( name="file.min.js", type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) binary_body = unicode_body.encode("utf-8") file.putfile(six.BytesIO(binary_body)) ReleaseFile.objects.create(name="file.min.js", release=release, organization_id=project.organization_id, file=file) result = fetch_release_file("file.min.js", release) assert isinstance(result.body, six.binary_type) assert result == http.UrlResult( "file.min.js", {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) # looking again should hit the cache - make sure it's come through the # caching/uncaching process unscathed new_result = fetch_release_file("file.min.js", release) assert result == new_result
def test_fallbacks(self): project = self.project release = Release.objects.create( organization_id=project.organization_id, version='abc', ) release.add_project(project) file = File.objects.create( name='~/file.min.js', type='release.file', headers={'Content-Type': 'application/json; charset=utf-8'}, ) binary_body = unicode_body.encode('utf-8') file.putfile(six.BytesIO(binary_body)) ReleaseFile.objects.create( name='~/file.min.js', release=release, organization_id=project.organization_id, file=file, ) result = fetch_release_file('http://example.com/file.min.js?lol', release) assert isinstance(result.body, six.binary_type) assert result == http.UrlResult( 'http://example.com/file.min.js?lol', {'content-type': 'application/json; charset=utf-8'}, binary_body, 200, 'utf-8', )
def test_unicode(self): project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) file = File.objects.create( name="file.min.js", type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) binary_body = unicode_body.encode("utf-8") file.putfile(six.BytesIO(binary_body)) ReleaseFile.objects.create(name="file.min.js", release=release, organization_id=project.organization_id, file=file) result = fetch_release_file("file.min.js", release) assert isinstance(result.body, six.binary_type) assert result == http.UrlResult( "file.min.js", {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) # test with cache hit, which should be compressed new_result = fetch_release_file("file.min.js", release) assert result == new_result
def test_tilde(self): project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) file = File.objects.create( name="~/file.min.js", type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) binary_body = unicode_body.encode("utf-8") file.putfile(six.BytesIO(binary_body)) ReleaseFile.objects.create( name="~/file.min.js", release=release, organization_id=project.organization_id, file=file, ) result = fetch_release_file("http://example.com/file.min.js?lol", release) assert isinstance(result.body, six.binary_type) assert result == http.UrlResult( "http://example.com/file.min.js?lol", {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", )
def result_from_cache(filename, result): # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None return http.UrlResult(filename, result[0], zlib.decompress(result[1]), result[2], encoding)
def test_distribution(self): project = self.project release = Release.objects.create( organization_id=project.organization_id, version='abc', ) release.add_project(project) other_file = File.objects.create( name='file.min.js', type='release.file', headers={'Content-Type': 'application/json; charset=utf-8'}, ) file = File.objects.create( name='file.min.js', type='release.file', headers={'Content-Type': 'application/json; charset=utf-8'}, ) binary_body = unicode_body.encode('utf-8') other_file.putfile(six.BytesIO(b'')) file.putfile(six.BytesIO(binary_body)) dist = release.add_dist('foo') ReleaseFile.objects.create( name='file.min.js', release=release, organization_id=project.organization_id, file=other_file, ) ReleaseFile.objects.create( name='file.min.js', release=release, dist=dist, organization_id=project.organization_id, file=file, ) result = fetch_release_file('file.min.js', release, dist) assert isinstance(result.body, six.binary_type) assert result == http.UrlResult( 'file.min.js', {'content-type': 'application/json; charset=utf-8'}, binary_body, 200, 'utf-8', ) # test with cache hit, which should be compressed new_result = fetch_release_file('file.min.js', release, dist) assert result == new_result
def test_retry_file_open(self) -> None: project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) content = b"foo" file = File.objects.create( name="file.min.js", type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) file.putfile(BytesIO(content)) ReleaseFile.objects.create( name=file.name, release=release, organization_id=project.organization_id, file=file, ) stale_file_error = OSError() stale_file_error.errno = errno.ESTALE bad_file = MagicMock() bad_file.chunks.side_effect = stale_file_error bad_file_reader = MagicMock() bad_file_reader.__enter__.return_value = bad_file good_file = MagicMock() good_file.chunks.return_value = iter([content]) good_file_reader = MagicMock() good_file_reader.__enter__.return_value = good_file with patch( "sentry.lang.javascript.processor.ReleaseFile.cache") as cache: cache.getfile.side_effect = [bad_file_reader, good_file_reader] assert fetch_release_file(file.name, release) == http.UrlResult( file.name, {k.lower(): v.lower() for k, v in file.headers.items()}, content, 200, "utf-8", ) assert bad_file.chunks.call_count == 1 assert good_file.chunks.call_count == 1
def test_non_url_with_release(self, mock_fetch_release_file): mock_fetch_release_file.return_value = http.UrlResult( "/example.js", {"content-type": "application/json"}, b"foo", 200, None ) release = Release.objects.create(version="1", organization_id=self.project.organization_id) release.add_project(self.project) result = fetch_file("/example.js", release=release) assert result.url == "/example.js" assert result.body == b"foo" assert isinstance(result.body, six.binary_type) assert result.headers == {"content-type": "application/json"} assert result.encoding is None
def test_non_url_with_release(self, mock_fetch_release_file): mock_fetch_release_file.return_value = http.UrlResult( '/example.js', {'content-type': 'application/json'}, 'foo', 200, None, ) release = Release.objects.create(version='1', organization_id=self.project.organization_id) release.add_project(self.project) result = fetch_file('/example.js', release=release) assert result.url == '/example.js' assert result.body == 'foo' assert isinstance(result.body, six.binary_type) assert result.headers == {'content-type': 'application/json'} assert result.encoding is None
def fetch_and_cache_artifact(filename, fetch_fn, cache_key, cache_key_meta, headers, compress_fn): # If the release file is not in cache, check if we can retrieve at # least the size metadata from cache and prevent compression and # caching if payload exceeds the backend limit. z_body_size = None if CACHE_MAX_VALUE_SIZE: cache_meta = cache.get(cache_key_meta) if cache_meta: z_body_size = int(cache_meta.get("compressed_size")) def fetch_release_body(): with fetch_fn() as fp: if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE: return None, fp.read() else: return compress_fn(fp) try: with metrics.timer("sourcemaps.release_file_read"): z_body, body = fetch_retry_policy(fetch_release_body) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # If we don't have the compressed body for caching because the # cached metadata said it is too large payload for the cache # backend, do not attempt to cache. if z_body: # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) # In case the previous call to cache implicitly fails, we use # the meta data to avoid pointless compression which is done # only for caching. cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600) return result
def test_caching(self): # Set the threshold to zero to force caching on the file system options.set("releasefile.cache-limit", 0) project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) file = File.objects.create( name="file.min.js", type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) binary_body = unicode_body.encode("utf-8") file.putfile(BytesIO(binary_body)) ReleaseFile.objects.create( name="file.min.js", release_id=release.id, organization_id=project.organization_id, file=file, ) result = fetch_release_file("file.min.js", release) assert isinstance(result.body, bytes) assert result == http.UrlResult( "file.min.js", {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) # test with cache hit, coming from the FS new_result = fetch_release_file("file.min.js", release) assert result == new_result
def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True): """ Pull down a URL, returning a UrlResult object. Attempts to fetch from the database first (assuming there's a release on the event), then the internet. Caches the result of each of those two attempts separately, whether or not those attempts are successful. Used for both source files and source maps. """ # If our url has been truncated, it'd be impossible to fetch # so we check for this early and bail if url[-3:] == "...": raise http.CannotFetch({ "type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url) }) # if we've got a release to look on, try that first (incl associated cache) if release: with metrics.timer("sourcemaps.release_file"): result = fetch_release_file(url, release, dist) else: result = None # otherwise, try the web-scraping cache and then the web itself cache_key = f"source:cache:v4:{md5_text(url).hexdigest()}" if result is None: if not allow_scraping or not url.startswith(("http:", "https:")): error = { "type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url) } raise http.CannotFetch(error) logger.debug("Checking cache for url %r", url) result = cache.get(cache_key) if result is not None: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[4] except IndexError: encoding = None # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off result = http.UrlResult(result[0], result[1], zlib.decompress(result[2]), result[3], encoding) if result is None: headers = {} verify_ssl = False if project and is_valid_origin(url, project=project): verify_ssl = bool(project.get_option("sentry:verify_ssl", False)) token = project.get_option("sentry:token") if token: token_header = project.get_option( "sentry:token_header") or "X-Sentry-Token" headers[token_header] = token with metrics.timer("sourcemaps.fetch"): result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl) z_body = zlib.compress(result.body) cache.set( cache_key, (url, result.headers, z_body, result.status, result.encoding), get_max_age(result.headers), ) # since the cache.set above can fail we can end up in a situation # where the file is too large for the cache. In that case we abort # the fetch and cache a failure and lock the domain for future # http fetches. if cache.get(cache_key) is None: error = { "type": EventError.TOO_LARGE_FOR_CACHE, "url": http.expose_url(url), } http.lock_domain(url, error=error) raise http.CannotFetch(error) # If we did not get a 200 OK we just raise a cannot fetch here. if result.status != 200: raise http.CannotFetch({ "type": EventError.FETCH_INVALID_HTTP_CODE, "value": result.status, "url": http.expose_url(url), }) # Make sure the file we're getting back is bytes. The only # reason it'd not be binary would be from old cached blobs, so # for compatibility with current cached files, let's coerce back to # binary and say utf8 encoding. if not isinstance(result.body, bytes): try: result = http.UrlResult( result.url, result.headers, result.body.encode("utf8"), result.status, result.encoding, ) except UnicodeEncodeError: error = { "type": EventError.FETCH_INVALID_ENCODING, "value": "utf8", "url": http.expose_url(url), } raise http.CannotFetch(error) # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML) # NOTE: possible to have JS files that don't actually end w/ ".js", but # this should catch 99% of cases if urlsplit(url).path.endswith(".js"): # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<'). # This cannot parse as valid JS/JSON. # NOTE: not relying on Content-Type header because apps often don't set this correctly # Discard leading whitespace (often found before doctype) body_start = result.body[:20].lstrip() if body_start[:1] == b"<": error = {"type": EventError.JS_INVALID_CONTENT, "url": url} raise http.CannotFetch(error) return result
def fetch_release_file(filename, release, dist=None): """ Attempt to retrieve a release artifact from the database. Caches the result of that attempt (whether successful or not). """ dist_name = dist and dist.name or None releasefile_ident = ReleaseFile.get_ident(filename, dist_name) cache_key = get_release_file_cache_key(release_id=release.id, releasefile_ident=releasefile_ident) # Cache key to store file metadata, currently only the size of the # compressed version of file. We cannot use the cache_key because large # payloads (silently) fail to cache due to e.g. memcached payload size # limitation and we use the meta data to avoid compression of such a files. cache_key_meta = get_release_file_cache_key_meta( release_id=release.id, releasefile_ident=releasefile_ident) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) # not in the cache (meaning we haven't checked the database recently), so check the database if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ ReleaseFile.get_ident(f, dist_name) for f in filename_choices ] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents).select_related("file")) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(rf for ident in filename_idents for rf in possible_files if rf.ident == ident) logger.debug("Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id) # If the release file is not in cache, check if we can retrieve at # least the size metadata from cache and prevent compression and # caching if payload exceeds the backend limit. z_body_size = None if CACHE_MAX_VALUE_SIZE: cache_meta = cache.get(cache_key_meta) if cache_meta: z_body_size = int(cache_meta.get("compressed_size")) def fetch_release_body(): with ReleaseFile.cache.getfile(releasefile) as fp: if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE: return None, fp.read() else: return compress_file(fp) try: with metrics.timer("sourcemaps.release_file_read"): z_body, body = fetch_retry_policy(fetch_release_body) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = { k.lower(): v for k, v in releasefile.file.headers.items() } encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # If we don't have the compressed body for caching because the # cached metadata said it is too large payload for the cache # backend, do not attempt to cache. if z_body: # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) # In case the previous call to cache implicitly fails, we use # the meta data to avoid pointless compression which is done # only for caching. cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600) # in the cache as an unsuccessful attempt elif result == -1: result = None # in the cache as a successful attempt, including the zipped contents of the file else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult(filename, result[0], zlib.decompress(result[1]), result[2], encoding) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list( ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next( (f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = { k.lower(): v for k, v in releasefile.file.headers.items() } encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult(filename, result[0], zlib.decompress(result[1]), result[2], encoding) return result
def test_compression(self, mock_compress_file): """ For files larger than max memcached payload size we want to avoid pointless compression and caching attempt since it fails silently. Tests scenarios: - happy path where compressed file is successfully cached - compressed payload is too large to cache and we will avoid compression and caching while the metadata cache exists """ project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) filename = "file.min.js" file = File.objects.create( name=filename, type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) binary_body = unicode_body.encode("utf-8") file.putfile(BytesIO(binary_body)) ReleaseFile.objects.create(name="file.min.js", release=release, organization_id=project.organization_id, file=file) mock_compress_file.return_value = (binary_body, binary_body) releasefile_ident = ReleaseFile.get_ident(filename, None) cache_key = get_release_file_cache_key( release_id=release.id, releasefile_ident=releasefile_ident) cache_key_meta = get_release_file_cache_key_meta( release_id=release.id, releasefile_ident=releasefile_ident) fetch_release_file(filename, release) # Here the ANY is File() retrieved from cache/db assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) # Remove cache and check that calling fetch_release_file will do the # compression and caching again cache.set(cache_key, None) mock_compress_file.reset_mock() fetch_release_file(filename, release) assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) # If the file is bigger than the max cache value threshold, avoid # compression and caching cache.set(cache_key, None) mock_compress_file.reset_mock() with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE", len(binary_body) - 1): result = fetch_release_file(filename, release) assert result == http.UrlResult( filename, {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) assert mock_compress_file.mock_calls == [] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) is None # If the file is bigger than the max cache value threshold, but the # metadata cache is empty as well, compress and attempt to cache anyway cache.set(cache_key, None) cache.set(cache_key_meta, None) mock_compress_file.reset_mock() with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE", len(binary_body) - 1): result = fetch_release_file(filename, release) assert result == http.UrlResult( filename, {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) # If the file is smaller than the max cache value threshold, but the # cache is empty, compress and cache cache.set(cache_key, None) mock_compress_file.reset_mock() with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE", len(binary_body) + 1): result = fetch_release_file(filename, release) assert result == http.UrlResult( filename, {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key)
def fetch_release_file(filename, release, dist=None): cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) dist_name = dist and dist.name or None if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents, ).select_related('file') ) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(( rf for ident in filename_idents for rf in possible_files if rf.ident == ident )) logger.debug( 'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id ) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release, dist=None): dist_name = dist and dist.name or None cache_key = "releasefile:v1:%s:%s" % (release.id, ReleaseFile.get_ident(filename, dist_name)) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents ).select_related("file") ) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next( (rf for ident in filename_idents for rf in possible_files if rf.ident == ident) ) logger.debug( "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id ) try: with metrics.timer("sourcemaps.release_file_read"): with ReleaseFile.cache.getfile(releasefile) as fp: z_body, body = compress_file(fp) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True): """ Pull down a URL, returning a UrlResult object. Attempts to fetch from the cache. """ # If our url has been truncated, it'd be impossible to fetch # so we check for this early and bail if url[-3:] == '...': raise http.CannotFetch( { 'type': EventError.JS_MISSING_SOURCE, 'url': http.expose_url(url), } ) if release: with metrics.timer('sourcemaps.release_file'): result = fetch_release_file(url, release, dist) else: result = None cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), ) if result is None: if not allow_scraping or not url.startswith(('http:', 'https:')): error = { 'type': EventError.JS_MISSING_SOURCE, 'url': http.expose_url(url), } raise http.CannotFetch(error) logger.debug('Checking cache for url %r', url) result = cache.get(cache_key) if result is not None: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[4] except IndexError: encoding = None # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off result = http.UrlResult( result[0], result[1], zlib.decompress(result[2]), result[3], encoding ) if result is None: headers = {} verify_ssl = False if project and is_valid_origin(url, project=project): verify_ssl = bool(project.get_option('sentry:verify_ssl', False)) token = project.get_option('sentry:token') if token: token_header = project.get_option('sentry:token_header') or 'X-Sentry-Token' headers[token_header] = token with metrics.timer('sourcemaps.fetch'): result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl) z_body = zlib.compress(result.body) cache.set( cache_key, (url, result.headers, z_body, result.status, result.encoding), get_max_age(result.headers)) # If we did not get a 200 OK we just raise a cannot fetch here. if result.status != 200: raise http.CannotFetch( { 'type': EventError.FETCH_INVALID_HTTP_CODE, 'value': result.status, 'url': http.expose_url(url), } ) # Make sure the file we're getting back is six.binary_type. The only # reason it'd not be binary would be from old cached blobs, so # for compatibility with current cached files, let's coerce back to # binary and say utf8 encoding. if not isinstance(result.body, six.binary_type): try: result = http.UrlResult( result.url, result.headers, result.body.encode('utf8'), result.status, result.encoding ) except UnicodeEncodeError: error = { 'type': EventError.FETCH_INVALID_ENCODING, 'value': 'utf8', 'url': http.expose_url(url), } raise http.CannotFetch(error) # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML) # NOTE: possible to have JS files that don't actually end w/ ".js", but # this should catch 99% of cases if url.endswith('.js'): # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<'). # This cannot parse as valid JS/JSON. # NOTE: not relying on Content-Type header because apps often don't set this correctly # Discard leading whitespace (often found before doctype) body_start = result.body[:20].lstrip() if body_start[:1] == u'<': error = { 'type': EventError.JS_INVALID_CONTENT, 'url': url, } raise http.CannotFetch(error) return result
def test_simple(self): result = http.UrlResult("http://example.com", {}, "", 200, None) assert discover_sourcemap(result) is None result = http.UrlResult( "http://example.com", {"x-sourcemap": "http://example.com/source.map.js"}, "", 200, None) assert discover_sourcemap(result) == "http://example.com/source.map.js" result = http.UrlResult( "http://example.com", {"sourcemap": "http://example.com/source.map.js"}, "", 200, None) assert discover_sourcemap(result) == "http://example.com/source.map.js" result = http.UrlResult( "http://example.com", {}, "//@ sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)", 200, None, ) assert discover_sourcemap(result) == "http://example.com/source.map.js" result = http.UrlResult( "http://example.com", {}, "//# sourceMappingURL=http://example.com/source.map.js\nconsole.log(true)", 200, None, ) assert discover_sourcemap(result) == "http://example.com/source.map.js" result = http.UrlResult( "http://example.com", {}, "console.log(true)\n//@ sourceMappingURL=http://example.com/source.map.js", 200, None, ) assert discover_sourcemap(result) == "http://example.com/source.map.js" result = http.UrlResult( "http://example.com", {}, "console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js", 200, None, ) assert discover_sourcemap(result) == "http://example.com/source.map.js" result = http.UrlResult( "http://example.com", {}, "console.log(true)\n//# sourceMappingURL=http://example.com/source.map.js\n//# sourceMappingURL=http://example.com/source2.map.js", 200, None, ) assert discover_sourcemap( result) == "http://example.com/source2.map.js" # sourceMappingURL found directly after code w/o newline result = http.UrlResult( "http://example.com", {}, "console.log(true);//# sourceMappingURL=http://example.com/source.map.js", 200, None, ) assert discover_sourcemap(result) == "http://example.com/source.map.js" result = http.UrlResult( "http://example.com", {}, "//# sourceMappingURL=app.map.js/*ascii:lol*/", 200, None) assert discover_sourcemap(result) == "http://example.com/app.map.js" result = http.UrlResult("http://example.com", {}, "//# sourceMappingURL=/*lol*/", 200, None) with self.assertRaises(AssertionError): discover_sourcemap(result)