def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file', 'file__blob').get() except ReleaseFile.DoesNotExist: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(unicode(e)) cache.set(cache_key, -1, 3600) result = None else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) result = (releasefile.file.headers, body, 200) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) result = (result[0], body, result[2]) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file').get() except ReleaseFile.DoesNotExist: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(unicode(e)) cache.set(cache_key, -1, 3600) result = None else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) result = (releasefile.file.headers, body, 200) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) result = (result[0], body, result[2]) return result
def fetch_release_file(filename, release, dist=None): cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) dist_name = dist and dist.name or None if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents, ).select_related('file') ) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(( rf for ident in filename_idents for rf in possible_files if rf.ident == ident )) logger.debug( 'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id ) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list( ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next( (f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = { k.lower(): v for k, v in releasefile.file.headers.items() } encoding = get_encoding_from_headers(headers) result = (headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = (result[0], zlib.decompress(result[1]), result[2], encoding) return result
def fetch_release_file(filename, release, dist=None): cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) dist_name = dist and dist.name or None if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents, ).select_related('file') ) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(( rf for ident in filename_idents for rf in possible_files if rf.ident == ident )) logger.debug( 'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id ) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception: logger.error('sourcemap.compress_read_failed', exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list( ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next( (f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: try: result = (releasefile.file.headers, body.decode('utf-8'), 200) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) try: result = (result[0], body.decode('utf-8'), result[2]) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) return result
def fetch_release_body(): with ReleaseFile.cache.getfile(releasefile) as fp: if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE: return None, fp.read() else: return compress_file(fp)
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list(ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next((f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: try: result = (releasefile.file.headers, body.decode('utf-8'), 200) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) try: result = (result[0], body.decode('utf-8'), result[2]) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list(ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next((f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = (headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = (result[0], zlib.decompress(result[1]), result[2], encoding) return result
def fetch_release_file(filename, release, dist=None): dist_name = dist and dist.name or None cache_key = "releasefile:v1:%s:%s" % (release.id, ReleaseFile.get_ident(filename, dist_name)) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents ).select_related("file") ) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next( (rf for ident in filename_idents for rf in possible_files if rf.ident == ident) ) logger.debug( "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id ) try: with metrics.timer("sourcemaps.release_file_read"): with ReleaseFile.cache.getfile(releasefile) as fp: z_body, body = compress_file(fp) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release, dist=None): """ Attempt to retrieve a release artifact from the database. Caches the result of that attempt (whether successful or not). """ dist_name = dist and dist.name or None releasefile_ident = ReleaseFile.get_ident(filename, dist_name) cache_key = get_release_file_cache_key( release_id=release.id, releasefile_ident=releasefile_ident ) # Cache key to store file metadata, currently only the size of the # compressed version of file. We cannot use the cache_key because large # payloads (silently) fail to cache due to e.g. memcached payload size # limitation and we use the meta data to avoid compression of such a files. cache_key_meta = get_release_file_cache_key_meta( release_id=release.id, releasefile_ident=releasefile_ident ) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) # not in the cache (meaning we haven't checked the database recently), so check the database if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents ).select_related("file") ) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next( rf for ident in filename_idents for rf in possible_files if rf.ident == ident ) # If the release file is not in cache, check if we can retrieve at # least the size metadata from cache and prevent compression and # caching if payload exceeds the backend limit. z_body = None z_body_size = None if CACHE_MAX_VALUE_SIZE: cache_meta = cache.get(cache_key_meta) if cache_meta: z_body_size = int(cache_meta.get("compressed_size")) logger.debug( "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id ) try: with metrics.timer("sourcemaps.release_file_read"): with ReleaseFile.cache.getfile(releasefile) as fp: if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE: body = fp.read() else: z_body, body = compress_file(fp) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # If we don't have the compressed body for caching because the # cached metadata said it is too large payload for the cache # backend, do not attempt to cache. if z_body: # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) # In case the previous call to cache implicitly fails, we use # the meta data to avoid pointless compression which is done # only for caching. cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600) # in the cache as an unsuccessful attempt elif result == -1: result = None # in the cache as a successful attempt, including the zipped contents of the file else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result