def create(self, request): """ Create GemContent from an artifact. """ data = request.data try: artifact = self.get_resource(data.pop('artifact'), Artifact) except KeyError: raise serializers.ValidationError( detail={'artifact': _('This field is required')}) name, version, spec_data = analyse_gem(artifact.file.name) data['name'] = name data['version'] = version serializer = self.get_serializer(data=data) serializer.is_valid(raise_exception=True) content = serializer.save() relative_path = os.path.join('gems', name + '-' + version + '.gem') spec_relative_path = os.path.join('quick/Marshal.4.8', name + '-' + version + '.gemspec.rz') ContentArtifact(artifact=artifact, content=content, relative_path=relative_path).save() ContentArtifact(artifact=_artifact_from_data(spec_data), content=content, relative_path=spec_relative_path).save() headers = self.get_success_headers(request.data) return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
def artifact(self, artifact): if self.pk: ca = ContentArtifact(artifact=artifact, content=self, relative_path="{}/{}/{}.tar.gz".format( self.role.namespace, self.role.name, self.version)) ca.save()
def put(self, request, path, pk=None): """ Responds with the actual manifest """ _, repository = self.get_dr_push(request, path) # iterate over all the layers and create chunk = request.META["wsgi.input"] artifact = self.receive_artifact(chunk) with storage.open(artifact.file.name) as artifact_file: raw_data = artifact_file.read() content_data = json.loads(raw_data) config_layer = content_data.get("config") config_blob = models.Blob.objects.get( digest=config_layer.get("digest")) manifest = models.Manifest( digest="sha256:{id}".format(id=artifact.sha256), schema_version=2, media_type=request.content_type, config_blob=config_blob, ) try: manifest.save() except IntegrityError: manifest = models.Manifest.objects.get(digest=manifest.digest) ca = ContentArtifact(artifact=artifact, content=manifest, relative_path=manifest.digest) try: ca.save() except IntegrityError: pass layers = content_data.get("layers") blobs = [] for layer in layers: blobs.append(layer.get("digest")) blobs_qs = models.Blob.objects.filter(digest__in=blobs) thru = [] for blob in blobs_qs: thru.append( models.BlobManifest(manifest=manifest, manifest_blob=blob)) models.BlobManifest.objects.bulk_create(objs=thru, ignore_conflicts=True, batch_size=1000) tag = models.Tag(name=pk, tagged_manifest=manifest) try: tag.save() except IntegrityError: pass with repository.new_version() as new_version: new_version.add_content( models.Manifest.objects.filter(digest=manifest.digest)) new_version.remove_content( models.Tag.objects.filter(name=tag.name)) new_version.add_content( models.Tag.objects.filter(name=tag.name, tagged_manifest=manifest)) return ManifestResponse(manifest, path, request, status=201)
def artifact(self, artifact): """ Set the artifact for this FileContent. """ if self.pk: ca = ContentArtifact(artifact=artifact, content=self, relative_path=self.relative_path) ca.save()
def artifact(self, artifact): """ Set the artifact for this Ansible Role version. """ if self.pk: ca = ContentArtifact( artifact=artifact, content=self, relative_path="{namespace}/{name}/{version}.tar.gz".format( namespace=self.role.namespace, name=self.role.name, version=self.version)) ca.save()
def next_group(self, additions): """ Generator of ExampleContent, ContentArtifacts, and RemoteArtifacts. This generator is responsible for creating all the models needed to create ExampleContent in Pulp. The ExampleContent object is stored in a dictionary so it can be referenced after downloads complete. This generator emits a :class:`pulpcore.plugin.download.asyncio.group.Group`. """ parsed_url = urlparse(self.feed_url) root_dir = os.path.dirname(parsed_url.path) for entry in self.read_manifest(): key = Key(path=entry['path'], digest=entry['digest']) if key in additions: path = os.path.join(root_dir, entry['path']) url = urlunparse(parsed_url._replace(path=path)) example_content = ExampleContent(path=entry['path'], digest=entry['digest']) content_id = tuple(getattr(example_content, f) for f in example_content.natural_key_fields()) self.content_dict[content_id] = example_content # The content is set on the content_artifact right before writing to the # database. This helps deal with race conditions when saving Content. content_artifact = ContentArtifact(relative_path=entry['path']) remote_artifacts = [RemoteArtifact(url=url, importer=self, sha256=entry['digest'], size=entry['size'], content_artifact=content_artifact)] yield Group(content_id, remote_artifacts)
def next_remote_artifact(self, additions): """ Generator of ExampleContent, ContentArtifacts, and RemoteArtifacts. This generator is responsible for creating all the models needed to create ExampleContent in Pulp. It stores the ExampleContent in a dictionary to be used in the deferred_sync method. This generator emits a RemoteArtifact object. Args: additions (set of namedtuple Key): Set of Keys corresponding to ExampleContent that should be created. Yields: RemoteArtifact that is needed for the ExampleContent. """ parsed_url = urlparse(self.feed_url) root_dir = os.path.dirname(parsed_url.path) for entry in self.read_manifest(): key = Key(path=entry['path'], digest=entry['digest']) if key in additions: path = os.path.join(root_dir, entry['path']) url = urlunparse(parsed_url._replace(path=path)) example_content = ExampleContent(path=entry['path'], digest=entry['digest']) self.content_dict[url] = example_content # The content is set on the content_artifact right before writing to the # database. This helps deal with race conditions when saving Content. content_artifact = ContentArtifact(relative_path=entry['path']) remote_artifact = RemoteArtifact(url=url, importer=self, sha256=entry['digest'], size=entry['size'], content_artifact=content_artifact) yield remote_artifact
async def run(self): """ The coroutine for this stage. Returns: The coroutine for this stage. """ async for batch in self.batches(): content_artifact_bulk = [] with transaction.atomic(): await self._pre_save(batch) for d_content in batch: if d_content.content.pk is None: try: with transaction.atomic(): d_content.content.save() except IntegrityError: d_content.content = \ d_content.content.__class__.objects.get( d_content.content.q()) continue for d_artifact in d_content.d_artifacts: content_artifact = ContentArtifact( content=d_content.content, artifact=d_artifact.artifact, relative_path=d_artifact.relative_path) content_artifact_bulk.append(content_artifact) ContentArtifact.objects.bulk_get_or_create( content_artifact_bulk) await self._post_save(batch) for declarative_content in batch: await self.put(declarative_content)
def get_or_create_blob(layer_json, manifest, path): """ Creates Blob from json snippet of manifest.json Args: layer_json (json): json manifest (class:`pulp_container.app.models.Manifest`): The manifest path (str): Path of the directory that contains layer Returns: class:`pulp_container.app.models.Blob` """ try: blob = Blob.objects.get(digest=layer_json["digest"]) except Blob.DoesNotExist: layer_file_name = "{}{}".format(path, layer_json["digest"][7:]) layer_artifact = Artifact.init_and_validate(layer_file_name) layer_artifact.save() blob = Blob(digest=layer_json["digest"], media_type=layer_json["mediaType"]) blob.save() ContentArtifact(artifact=layer_artifact, content=blob, relative_path=layer_json["digest"]).save() if blob.media_type != MEDIA_TYPE.CONFIG_BLOB_OCI: BlobManifest(manifest=manifest, manifest_blob=blob).save() return blob
def put(self, request, path, pk=None): """ Create a blob from uploaded chunks. """ _, repository = self.get_dr_push(request, path) digest = request.query_params["digest"] upload = models.Upload.objects.get(pk=pk, repository=repository) chunks = UploadChunk.objects.filter(upload=upload).order_by("offset") with NamedTemporaryFile("ab") as temp_file: for chunk in chunks: temp_file.write(chunk.file.read()) temp_file.flush() uploaded_file = PulpTemporaryUploadedFile.from_file( File(open(temp_file.name, "rb"))) if uploaded_file.hashers["sha256"].hexdigest() == digest[len("sha256:" ):]: try: artifact = Artifact.init_and_validate(uploaded_file) artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) try: blob = models.Blob(digest=digest, media_type=models.MEDIA_TYPE.REGULAR_BLOB) blob.save() except IntegrityError: blob = models.Blob.objects.get(digest=digest) try: blob_artifact = ContentArtifact(artifact=artifact, content=blob, relative_path=digest) blob_artifact.save() except IntegrityError: pass with repository.new_version() as new_version: new_version.add_content(models.Blob.objects.filter(pk=blob.pk)) upload.delete() return BlobResponse(blob, path, 201, request) else: raise Exception("The digest did not match")
def _save_manifest(self, artifact, manifest_digest, content_type, config_blob=None): manifest = models.Manifest( digest=manifest_digest, schema_version=2, media_type=content_type, config_blob=config_blob, ) try: manifest.save() except IntegrityError: manifest = models.Manifest.objects.get(digest=manifest.digest) manifest.touch() ca = ContentArtifact(artifact=artifact, content=manifest, relative_path=manifest.digest) try: ca.save() except IntegrityError: ca = ContentArtifact.objects.get(content=manifest, relative_path=manifest.digest) if not ca.artifact: ca.artifact = artifact ca.save(update_fields=["artifact"]) return manifest
def add_image_from_directory_to_repository(path, repository, tag): """ Creates a Manifest and all blobs from a directory with OCI image Args: path (str): Path to directory with the OCI image repository (class:`pulpcore.plugin.models.Repository`): The destination repository tag (str): Tag name for the new image in the repository Returns: A class:`pulpcore.plugin.models.RepositoryVersion` that contains the new OCI container image and tag. """ manifest_path = "{}manifest.json".format(path) manifest_artifact = Artifact.init_and_validate(manifest_path) manifest_artifact.save() manifest_digest = "sha256:{}".format(manifest_artifact.sha256) manifest = Manifest(digest=manifest_digest, schema_version=2, media_type=MEDIA_TYPE.MANIFEST_OCI) manifest.save() ContentArtifact(artifact=manifest_artifact, content=manifest, relative_path=manifest_digest).save() tag = Tag(name=tag, tagged_manifest=manifest) tag.save() ContentArtifact(artifact=manifest_artifact, content=tag, relative_path=tag.name).save() with repository.new_version() as new_repo_version: new_repo_version.add_content(Manifest.objects.filter(pk=manifest.pk)) new_repo_version.add_content(Tag.objects.filter(pk=tag.pk)) with open(manifest_artifact.file.path, "r") as manifest_file: manifest_json = json.load(manifest_file) config_blob = get_or_create_blob(manifest_json["config"], manifest, path) manifest.config_blob = config_blob manifest.save() new_repo_version.add_content( Blob.objects.filter(pk=config_blob.pk)) for layer in manifest_json["layers"]: blob = get_or_create_blob(layer, manifest, path) new_repo_version.add_content(Blob.objects.filter(pk=blob.pk)) return new_repo_version
def put(self, request, path, pk=None): """Handles creation of Uploads.""" _, repository = self.get_dr_push(request, path) digest = request.query_params["digest"] upload = models.Upload.objects.get(pk=pk, repository=repository) if upload.sha256 == digest[len("sha256:"):]: try: artifact = Artifact( file=upload.file.name, md5=upload.md5, sha1=upload.sha1, sha256=upload.sha256, sha384=upload.sha384, sha512=upload.sha512, size=upload.file.size, ) artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) try: blob = models.Blob(digest=digest, media_type=models.MEDIA_TYPE.REGULAR_BLOB) blob.save() except IntegrityError: blob = models.Blob.objects.get(digest=digest) try: blob_artifact = ContentArtifact(artifact=artifact, content=blob, relative_path=digest) blob_artifact.save() except IntegrityError: pass with repository.new_version() as new_version: new_version.add_content(models.Blob.objects.filter(pk=blob.pk)) upload.delete() return BlobResponse(blob, path, 201, request) else: raise Exception("The digest did not match")
def create_content_artifacts(self, dc): """ Create ContentArtifacts to associate saved Content to saved Artifacts. Args: dc (class:`~pulpcore.plugin.stages.DeclarativeContent`): Object containing Content and Artifacts to relate. """ for da in dc.d_artifacts: content_artifact = ContentArtifact( content=dc.content, artifact=da.artifact, relative_path=da.relative_path ) try: content_artifact.save() except IntegrityError: content_artifact = ContentArtifact.objects.get( content=dc.content, artifact=da.artifact, relative_path=da.relative_path ) remote_artifact_data = { 'url': da.url, 'size': da.artifact.size, 'md5': da.artifact.md5, 'sha1': da.artifact.sha1, 'sha224': da.artifact.sha224, 'sha256': da.artifact.sha256, 'sha384': da.artifact.sha384, 'sha512': da.artifact.sha512, 'remote': da.remote, } new_remote_artifact = RemoteArtifact( content_artifact=content_artifact, **remote_artifact_data ) try: new_remote_artifact.save() except IntegrityError: pass
def create(self, validated_data): """ Create a Package. Overriding default create() to deal with artifact properly. Args: validated_data (dict): Data used to create the Package Returns: models.Package: The created Package """ artifact = validated_data.pop('artifact') package = Package.objects.create(**validated_data) ca = ContentArtifact(artifact=artifact, content=package, relative_path=package.filename) ca.save() return package
def save(self): """ Update the DB: - Create (or fetch) the Artifact. - Create (or fetch) the ContentArtifact. - Create (or update) the RemoteArtifact. """ if self._stored_model: try: with transaction.atomic(): self._stored_model.save() except IntegrityError: q = self.artifact_q() self._stored_model = Artifact.objects.get(q) try: with transaction.atomic(): content_artifact = ContentArtifact( relative_path=self.relative_path, content=self.content.stored_model, artifact=self._stored_model) content_artifact.save() except IntegrityError: content_artifact = ContentArtifact.objects.get( relative_path=self.relative_path, content=self.content.stored_model) if self._stored_model: content_artifact.artifact = self._stored_model content_artifact.save() digests = {f: getattr(self._model, f) for f in Artifact.DIGEST_FIELDS} try: with transaction.atomic(): remote_artifact = RemoteArtifact( url=self.url, remote=self.remote, content_artifact=content_artifact, size=self._model.size, **digests) remote_artifact.save() except IntegrityError: q_set = RemoteArtifact.objects.filter( remote=self.remote, content_artifact=content_artifact) q_set.update( url=self.url, size=self._model.size, **digests)
def setUp(self): """Setup database fixtures.""" self.package1 = Package( package_name='aegir', version='0.1-edda0', architecture='sea', maintainer='Utgardloki', description='A sea jötunn associated with the ocean.', ) self.package1.save() self.artifact1 = Artifact( size=42, md5='aabb', sha1='ccdd', sha256='eeff', file=SimpleUploadedFile('test_filename', b'test content'), ) self.artifact1.save() ContentArtifact(artifact=self.artifact1, content=self.package1).save()
def setUp(self): """Setup database fixtures.""" self.package1 = Package( package_name="aegir", version="0.1-edda0", architecture="sea", maintainer="Utgardloki", description="A sea jötunn associated with the ocean.", ) self.package1.save() self.artifact1 = Artifact( size=42, md5="aabb", sha1="ccdd", sha256="eeff", file=SimpleUploadedFile("test_filename", b"test content"), ) self.artifact1.save() ContentArtifact(artifact=self.artifact1, content=self.package1).save()
async def run(self): """ The coroutine for this stage. Returns: The coroutine for this stage. """ async for batch in self.batches(): content_artifact_bulk = [] with transaction.atomic(): await self._pre_save(batch) for d_content in batch: # Are we saving to the database for the first time? content_already_saved = not d_content.content._state.adding if not content_already_saved: try: with transaction.atomic(): d_content.content.save() except IntegrityError as e: try: d_content.content = d_content.content.__class__.objects.get( d_content.content.q()) except ObjectDoesNotExist: raise e continue for d_artifact in d_content.d_artifacts: if not d_artifact.artifact._state.adding: artifact = d_artifact.artifact else: # set to None for on-demand synced artifacts artifact = None content_artifact = ContentArtifact( content=d_content.content, artifact=artifact, relative_path=d_artifact.relative_path, ) content_artifact_bulk.append(content_artifact) ContentArtifact.objects.bulk_get_or_create( content_artifact_bulk) await self._post_save(batch) for declarative_content in batch: await self.put(declarative_content)
async def __call__(self, in_q, out_q): """ The coroutine for this stage. Args: in_q (:class:`asyncio.Queue`): The queue to receive :class:`~pulpcore.plugin.stages.DeclarativeContent` objects from. out_q (:class:`asyncio.Queue`): The queue to put :class:`~pulpcore.plugin.stages.DeclarativeContent` into. Returns: The coroutine for this stage. """ batch = [] shutdown = False while True: try: declarative_content = in_q.get_nowait() except asyncio.QueueEmpty: if not batch and not shutdown: declarative_content = await in_q.get() batch.append(declarative_content) continue else: batch.append(declarative_content) continue content_artifact_bulk = [] remote_artifact_bulk = [] remote_artifact_map = {} with transaction.atomic(): for declarative_content in batch: if declarative_content is None: shutdown = True continue if declarative_content.content.pk is None: declarative_content.content.save() for declarative_artifact in declarative_content.d_artifacts: content_artifact = ContentArtifact( content=declarative_content.content, artifact=declarative_artifact.artifact, relative_path=declarative_artifact. relative_path) content_artifact_bulk.append(content_artifact) remote_artifact_data = { 'url': declarative_artifact.url, 'size': declarative_artifact.artifact.size, 'md5': declarative_artifact.artifact.md5, 'sha1': declarative_artifact.artifact.sha1, 'sha224': declarative_artifact.artifact.sha224, 'sha256': declarative_artifact.artifact.sha256, 'sha384': declarative_artifact.artifact.sha384, 'sha512': declarative_artifact.artifact.sha512, 'remote': declarative_artifact.remote, } rel_path = declarative_artifact.relative_path content_key = str( content_artifact.content.pk) + rel_path remote_artifact_map[ content_key] = remote_artifact_data for content_artifact in ContentArtifact.objects.bulk_create( content_artifact_bulk): rel_path = content_artifact.relative_path content_key = str(content_artifact.content.pk) + rel_path remote_artifact_data = remote_artifact_map.pop(content_key) new_remote_artifact = RemoteArtifact( content_artifact=content_artifact, **remote_artifact_data) remote_artifact_bulk.append(new_remote_artifact) RemoteArtifact.objects.bulk_create(remote_artifact_bulk) for declarative_content in batch: if declarative_content is None: continue await out_q.put(declarative_content) if shutdown: break batch = [] await out_q.put(None)
def put(self, request, path, pk=None): """ Responds with the actual manifest """ _, repository = self.get_dr_push(request, path) # iterate over all the layers and create chunk = request.META["wsgi.input"] artifact = self.receive_artifact(chunk) with storage.open(artifact.file.name) as artifact_file: raw_data = artifact_file.read() content_data = json.loads(raw_data) config_layer = content_data.get("config") config_blob = models.Blob.objects.get(digest=config_layer.get("digest")) manifest = models.Manifest( digest="sha256:{id}".format(id=artifact.sha256), schema_version=2, media_type=request.content_type, config_blob=config_blob, ) try: manifest.save() except IntegrityError: manifest = models.Manifest.objects.get(digest=manifest.digest) ca = ContentArtifact(artifact=artifact, content=manifest, relative_path=manifest.digest) try: ca.save() except IntegrityError: pass layers = content_data.get("layers") blobs = [] for layer in layers: blobs.append(layer.get("digest")) blobs_qs = models.Blob.objects.filter(digest__in=blobs) thru = [] for blob in blobs_qs: thru.append(models.BlobManifest(manifest=manifest, manifest_blob=blob)) models.BlobManifest.objects.bulk_create(objs=thru, ignore_conflicts=True, batch_size=1000) tag = models.Tag(name=pk, tagged_manifest=manifest) try: tag.save() except IntegrityError: tag = models.Tag.objects.get(name=tag.name, tagged_manifest=manifest) tags_to_remove = models.Tag.objects.filter( pk__in=repository.latest_version().content.all(), name=tag ).exclude(tagged_manifest=manifest) dispatched_task = dispatch( add_and_remove, [repository], kwargs={ "repository_pk": str(repository.pk), "add_content_units": [str(tag.pk), str(manifest.pk)], "remove_content_units": [str(pk) for pk in tags_to_remove.values_list("pk")], }, ) # Wait a small amount of time for dummy in range(3): time.sleep(1) task = Task.objects.get(pk=dispatched_task.pk) if task.state == "completed": task.delete() return ManifestResponse(manifest, path, request, status=201) elif task.state in ["waiting", "running"]: continue else: error = task.error task.delete() raise Exception(str(error)) raise Throttled()
def put(self, request, path, pk=None): """ Create a blob from uploaded chunks. """ _, repository = self.get_dr_push(request, path) digest = request.query_params["digest"] # Try to see if the client came back after we told it to backoff with the ``Throttled`` # exception. In that case we answer based on the task state, or make it backoff again. # This mechanism seems to work with podman but not with docker. However we let the task run # anyway, since all clients will look with a HEAD request before attemting to upload a blob # again. try: upload = models.Upload.objects.get(pk=pk, repository=repository) except models.Upload.DoesNotExist as e_upload: # Upload has been deleted => task has started or even finished try: task = Task.objects.filter( name__endswith="add_and_remove", reserved_resources_record__resource=f"upload:{pk}", ).last() except Task.DoesNotExist: # No upload and no task for it => the upload probably never existed # return 404 raise e_upload if task.state == "completed": task.delete() blob = models.Blob.objects.get(digest=digest) return BlobResponse(blob, path, 201, request) elif task.state in ["waiting", "running"]: raise Throttled() else: error = task.error task.delete() raise Exception(str(error)) chunks = UploadChunk.objects.filter(upload=upload).order_by("offset") with NamedTemporaryFile("ab") as temp_file: for chunk in chunks: temp_file.write(chunk.file.read()) temp_file.flush() uploaded_file = PulpTemporaryUploadedFile.from_file(File(open(temp_file.name, "rb"))) if uploaded_file.hashers["sha256"].hexdigest() == digest[len("sha256:") :]: try: artifact = Artifact.init_and_validate(uploaded_file) artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) try: blob = models.Blob(digest=digest, media_type=models.MEDIA_TYPE.REGULAR_BLOB) blob.save() except IntegrityError: blob = models.Blob.objects.get(digest=digest) try: blob_artifact = ContentArtifact( artifact=artifact, content=blob, relative_path=digest ) blob_artifact.save() except IntegrityError: pass upload.delete() dispatched_task = dispatch( add_and_remove, [f"upload:{pk}", repository], kwargs={ "repository_pk": str(repository.pk), "add_content_units": [str(blob.pk)], "remove_content_units": [], }, ) # Wait a small amount of time for dummy in range(3): time.sleep(1) task = Task.objects.get(pk=dispatched_task.pk) if task.state == "completed": task.delete() return BlobResponse(blob, path, 201, request) elif task.state in ["waiting", "running"]: continue else: error = task.error task.delete() raise Exception(str(error)) raise Throttled() else: raise Exception("The digest did not match")
def artifact(self, artifact): if self.pk: ca = ContentArtifact(artifact=artifact, content=self, relative_path=self.relative_path) ca.save()
def process_batch(): content_artifact_bulk = [] to_update_ca_query = ContentArtifact.objects.none() to_update_ca_bulk = [] to_update_ca_artifact = {} with transaction.atomic(): self._pre_save(batch) # Process the batch in dc.content.natural_keys order. # This prevents deadlocks when we're processing the same/similar content # in concurrent workers. batch.sort(key=lambda x: "".join( map(str, x.content.natural_key()))) for d_content in batch: # Are we saving to the database for the first time? content_already_saved = not d_content.content._state.adding if not content_already_saved: try: with transaction.atomic(): d_content.content.save() except IntegrityError as e: try: d_content.content = d_content.content.__class__.objects.get( d_content.content.q()) except ObjectDoesNotExist: raise e else: for d_artifact in d_content.d_artifacts: if not d_artifact.artifact._state.adding: artifact = d_artifact.artifact else: # set to None for on-demand synced artifacts artifact = None content_artifact = ContentArtifact( content=d_content.content, artifact=artifact, relative_path=d_artifact.relative_path, ) content_artifact_bulk.append( content_artifact) continue # When the Content already exists, check if ContentArtifacts need to be # updated for d_artifact in d_content.d_artifacts: if not d_artifact.artifact._state.adding: # the artifact is already present in the database; update references # Creating one large query and one large dictionary to_update_ca_query |= ContentArtifact.objects.filter( content=d_content.content, relative_path=d_artifact.relative_path, ) key = (d_content.content.pk, d_artifact.relative_path) to_update_ca_artifact[ key] = d_artifact.artifact # Query db once and update each object in memory for bulk_update call for content_artifact in to_update_ca_query.iterator(): key = (content_artifact.content_id, content_artifact.relative_path) # Maybe remove dict elements after to reduce memory? content_artifact.artifact = to_update_ca_artifact[key] to_update_ca_bulk.append(content_artifact) # Sort the lists we're about to do bulk updates/creates on. # We know to_update_ca_bulk entries already are in the DB, so we can enforce # order just using pulp_id. to_update_ca_bulk.sort(key=lambda x: x.pulp_id) content_artifact_bulk.sort( key=lambda x: ContentArtifact.sort_key(x)) ContentArtifact.objects.bulk_update( to_update_ca_bulk, ["artifact"]) ContentArtifact.objects.bulk_get_or_create( content_artifact_bulk) self._post_save(batch)
async def __call__(self, in_q, out_q): """ The coroutine for this stage. Args: in_q (:class:`asyncio.Queue`): The queue to receive :class:`~pulpcore.plugin.stages.DeclarativeContent` objects from. out_q (:class:`asyncio.Queue`): The queue to put :class:`~pulpcore.plugin.stages.DeclarativeContent` into. Returns: The coroutine for this stage. """ async for batch in self.batches(in_q): content_artifact_bulk = [] remote_artifact_bulk = [] remote_artifact_map = {} with transaction.atomic(): await self._pre_save(batch) for declarative_content in batch: if declarative_content.content.pk is None: declarative_content.content.save() for declarative_artifact in declarative_content.d_artifacts: content_artifact = ContentArtifact( content=declarative_content.content, artifact=declarative_artifact.artifact, relative_path=declarative_artifact. relative_path) content_artifact_bulk.append(content_artifact) remote_artifact_data = { 'url': declarative_artifact.url, 'size': declarative_artifact.artifact.size, 'md5': declarative_artifact.artifact.md5, 'sha1': declarative_artifact.artifact.sha1, 'sha224': declarative_artifact.artifact.sha224, 'sha256': declarative_artifact.artifact.sha256, 'sha384': declarative_artifact.artifact.sha384, 'sha512': declarative_artifact.artifact.sha512, 'remote': declarative_artifact.remote, } rel_path = declarative_artifact.relative_path content_key = str( content_artifact.content.pk) + rel_path remote_artifact_map[ content_key] = remote_artifact_data for content_artifact in ContentArtifact.objects.bulk_create( content_artifact_bulk): rel_path = content_artifact.relative_path content_key = str(content_artifact.content.pk) + rel_path remote_artifact_data = remote_artifact_map.pop(content_key) new_remote_artifact = RemoteArtifact( content_artifact=content_artifact, **remote_artifact_data) remote_artifact_bulk.append(new_remote_artifact) RemoteArtifact.objects.bulk_create(remote_artifact_bulk) await self._post_save(batch) for declarative_content in batch: await out_q.put(declarative_content) await out_q.put(None)