def get_from_archive(url: str, archive: ReleaseArchive) -> Tuple[bytes, dict]: candidates = ReleaseFile.normalize(url) for candidate in candidates: try: return archive.get_file_by_url(candidate) except KeyError: pass # None of the filenames matched raise KeyError(f"Not found in archive: '{url}'")
def get_index_entry(release, dist, url) -> Optional[dict]: try: index = get_artifact_index(release, dist) except Exception as exc: logger.error("sourcemaps.index_read_failed", exc_info=exc) return None if index: for candidate in ReleaseFile.normalize(url): entry = index.get("files", {}).get(candidate) if entry: return entry return None
def fetch_release_file(filename, release, dist=None): cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) dist_name = dist and dist.name or None if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents, ).select_related('file') ) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(( rf for ident in filename_idents for rf in possible_files if rf.ident == ident )) logger.debug( 'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id ) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release, dist=None): cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) dist_name = dist and dist.name or None if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents, ).select_related('file') ) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(( rf for ident in filename_idents for rf in possible_files if rf.ident == ident )) logger.debug( 'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id ) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception: logger.error('sourcemap.compress_read_failed', exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release, dist=None): """ Attempt to retrieve a release artifact from the database. Caches the result of that attempt (whether successful or not). """ dist_name = dist and dist.name or None releasefile_ident = ReleaseFile.get_ident(filename, dist_name) cache_key = get_release_file_cache_key(release_id=release.id, releasefile_ident=releasefile_ident) # Cache key to store file metadata, currently only the size of the # compressed version of file. We cannot use the cache_key because large # payloads (silently) fail to cache due to e.g. memcached payload size # limitation and we use the meta data to avoid compression of such a files. cache_key_meta = get_release_file_cache_key_meta( release_id=release.id, releasefile_ident=releasefile_ident) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) # not in the cache (meaning we haven't checked the database recently), so check the database if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ ReleaseFile.get_ident(f, dist_name) for f in filename_choices ] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents).select_related("file")) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(rf for ident in filename_idents for rf in possible_files if rf.ident == ident) logger.debug("Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id) # If the release file is not in cache, check if we can retrieve at # least the size metadata from cache and prevent compression and # caching if payload exceeds the backend limit. z_body_size = None if CACHE_MAX_VALUE_SIZE: cache_meta = cache.get(cache_key_meta) if cache_meta: z_body_size = int(cache_meta.get("compressed_size")) def fetch_release_body(): with ReleaseFile.cache.getfile(releasefile) as fp: if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE: return None, fp.read() else: return compress_file(fp) try: with metrics.timer("sourcemaps.release_file_read"): z_body, body = fetch_retry_policy(fetch_release_body) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = { k.lower(): v for k, v in releasefile.file.headers.items() } encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # If we don't have the compressed body for caching because the # cached metadata said it is too large payload for the cache # backend, do not attempt to cache. if z_body: # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) # In case the previous call to cache implicitly fails, we use # the meta data to avoid pointless compression which is done # only for caching. cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600) # in the cache as an unsuccessful attempt elif result == -1: result = None # in the cache as a successful attempt, including the zipped contents of the file else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult(filename, result[0], zlib.decompress(result[1]), result[2], encoding) return result
def fetch_release_file(filename, release, dist=None): dist_name = dist and dist.name or None cache_key = "releasefile:v1:%s:%s" % (release.id, ReleaseFile.get_ident(filename, dist_name)) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents ).select_related("file") ) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next( (rf for ident in filename_idents for rf in possible_files if rf.ident == ident) ) logger.debug( "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id ) try: with metrics.timer("sourcemaps.release_file_read"): with ReleaseFile.cache.getfile(releasefile) as fp: z_body, body = compress_file(fp) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release, dist=None): """ Attempt to retrieve a release artifact from the database. Caches the result of that attempt (whether successful or not). """ dist_name = dist and dist.name or None cache_key, cache_key_meta = get_cache_keys(filename, release, dist) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) # not in the cache (meaning we haven't checked the database recently), so check the database if result is None: with metrics.timer("sourcemaps.release_artifact_from_file"): filename_choices = ReleaseFile.normalize(filename) filename_idents = [ ReleaseFile.get_ident(f, dist_name) for f in filename_choices ] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id) possible_files = list( ReleaseFile.objects.filter( release_id=release.id, dist_id=dist.id if dist else dist, ident__in=filename_idents, ).select_related("file")) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id, ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(rf for ident in filename_idents for rf in possible_files if rf.ident == ident) logger.debug( "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id, ) result = fetch_and_cache_artifact( filename, lambda: ReleaseFile.cache.getfile(releasefile), cache_key, cache_key_meta, releasefile.file.headers, compress_file, ) # in the cache as an unsuccessful attempt elif result == -1: result = None # in the cache as a successful attempt, including the zipped contents of the file else: result = result_from_cache(filename, result) return result