def _open_stream(formatter, tag, schema1_manifest, derived_image_id, handlers, reporter): """ This method generates a stream of data which will be replicated and read from the queue files. This method runs in a separate process. """ # For performance reasons, we load the full image list here, cache it, then disconnect from # the database. with database.UseThenDisconnect(app.config): layers = registry_model.list_parsed_manifest_layers( tag.repository, schema1_manifest, storage, include_placements=True) def image_stream_getter(store, blob): def get_stream_for_storage(): current_image_stream = store.stream_read_file( blob.placements, blob.storage_path) logger.debug("Returning blob %s: %s", blob.digest, blob.storage_path) return current_image_stream return get_stream_for_storage def tar_stream_getter_iterator(): # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, config_provider=config_provider, ip_resolver=ip_resolver) # Note: We reverse because we have to start at the leaf layer and move upward, # as per the spec for the formatters. for layer in reversed(layers): yield image_stream_getter(store, layer.blob) stream = formatter.build_stream( tag, schema1_manifest, derived_image_id, layers, tar_stream_getter_iterator, reporter=reporter, ) for handler_fn in handlers: stream = wrap_with_handler(stream, handler_fn) return stream.read
def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1): """ Uploads a chunk of data found in the given input file-like interface. start_offset and length are optional and should match a range header if any was given. Returns the total number of bytes uploaded after this upload has completed. Raises a BlobUploadException if the upload failed. """ assert start_offset is not None assert length is not None if start_offset > 0 and start_offset > self.blob_upload.byte_count: logger.error( "start_offset provided greater than blob_upload.byte_count") raise BlobRangeMismatchException() # Ensure that we won't go over the allowed maximum size for blobs. max_blob_size = bitmath.parse_string_unsafe( self.settings.maximum_blob_size) uploaded = bitmath.Byte(length + start_offset) if length > -1 and uploaded > max_blob_size: raise BlobTooLargeException(uploaded=uploaded.bytes, max_allowed=max_blob_size.bytes) location_set = {self.blob_upload.location_name} upload_error = None with CloseForLongOperation(app_config): if start_offset > 0 and start_offset < self.blob_upload.byte_count: # Skip the bytes which were received on a previous push, which are already stored and # included in the sha calculation overlap_size = self.blob_upload.byte_count - start_offset input_fp = StreamSlice(input_fp, overlap_size) # Update our upload bounds to reflect the skipped portion of the overlap start_offset = self.blob_upload.byte_count length = max(length - overlap_size, 0) # We use this to escape early in case we have already processed all of the bytes the user # wants to upload. if length == 0: return self.blob_upload.byte_count input_fp = wrap_with_handler(input_fp, self.blob_upload.sha_state.update) if self.extra_blob_stream_handlers: for handler in self.extra_blob_stream_handlers: input_fp = wrap_with_handler(input_fp, handler) # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the # stream so we can determine the uncompressed size. We'll throw out this data if another chunk # comes in, but in the common case the docker client only sends one chunk. size_info = None if start_offset == 0 and self.blob_upload.chunk_count == 0: size_info, fn = calculate_size_handler() input_fp = wrap_with_handler(input_fp, fn) start_time = time.time() length_written, new_metadata, upload_error = self.storage.stream_upload_chunk( location_set, self.blob_upload.upload_id, start_offset, length, input_fp, self.blob_upload.storage_metadata, content_type=BLOB_CONTENT_TYPE, ) if upload_error is not None: logger.error("storage.stream_upload_chunk returned error %s", upload_error) raise BlobUploadException(upload_error) # Update the chunk upload time and push bytes metrics. chunk_upload_duration.labels( list(location_set)[0]).observe(time.time() - start_time) pushed_bytes_total.inc(length_written) # Ensure we have not gone beyond the max layer size. new_blob_bytes = self.blob_upload.byte_count + length_written new_blob_size = bitmath.Byte(new_blob_bytes) if new_blob_size > max_blob_size: raise BlobTooLargeException(uploaded=new_blob_size, max_allowed=max_blob_size.bytes) # If we determined an uncompressed size and this is the first chunk, add it to the blob. # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks. uncompressed_byte_count = self.blob_upload.uncompressed_byte_count if size_info is not None and self.blob_upload.chunk_count == 0 and size_info.is_valid: uncompressed_byte_count = size_info.uncompressed_size elif length_written > 0: # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't # know the uncompressed size. uncompressed_byte_count = None self.blob_upload = registry_model.update_blob_upload( self.blob_upload, uncompressed_byte_count, new_metadata, new_blob_bytes, self.blob_upload.chunk_count + 1, self.blob_upload.sha_state, ) if self.blob_upload is None: raise BlobUploadException("Could not complete upload of chunk") return new_blob_bytes