Exemplo n.º 1
0
    def content_batch_qs(self,
                         content_qs=None,
                         order_by_params=("pk", ),
                         batch_size=1000):
        """
        Generate content batches to efficiently iterate over all content.

        Generates query sets that span the `content_qs` content of the repository
        version. Each yielded query set evaluates to at most `batch_size` content records.
        This is useful to limit the memory footprint when iterating over all content of
        a repository version.

        .. note::

            * This generator is not safe against changes (i.e. add/remove content) during
              the iteration!

            * As the method uses slices internally, the queryset must be ordered to yield
              stable results. By default, it is ordered by primary key.

        Args:
            content_qs (:class:`django.db.models.QuerySet`): The queryset for Content that will be
                restricted further to the content present in this repository version. If not given,
                ``Content.objects.all()`` is used (to iterate over all content present in the
                repository version). A plugin may want to use a specific subclass of
                :class:`~pulpcore.plugin.models.Content` or use e.g. ``filter()`` to select
                a subset of the repository version's content.
            order_by_params (tuple of str): The parameters for the ``order_by`` clause
                for the content. The Default is ``("pk",)``. This needs to
                specify a stable order. For example, if you want to iterate by
                decreasing creation time stamps use ``("-pulp_created", "pk")`` to
                ensure that content records are still sorted by primary key even
                if their creation timestamp happens to be equal.
            batch_size (int): The maximum batch size.

        Yields:
            :class:`django.db.models.QuerySet`: A QuerySet representing a slice of the content.

        Example:
            The following code could be used to loop over all ``FileContent`` in
            ``repository_version``. It prefetches the related
            :class:`~pulpcore.plugin.models.ContentArtifact` instances for every batch::

                repository_version = ...

                batch_generator = repository_version.content_batch_qs(
                    content_class=FileContent.objects.all()
                )
                for content_batch_qs in batch_generator:
                    content_batch_qs.prefetch_related("contentartifact_set")
                    for content in content_batch_qs:
                        ...

        """
        if content_qs is None:
            content_qs = Content.objects
        version_content_qs = content_qs.filter(
            version_memberships__in=self._content_relationships()).order_by(
                *order_by_params)
        yield from batch_qs(version_content_qs, batch_size=batch_size)
Exemplo n.º 2
0
    def add_content(self, content):
        """
        Add a content unit to this version.

        Args:
           content (django.db.models.QuerySet): Set of Content to add

        Raise:
            pulpcore.exception.ResourceImmutableError: if add_content is called on a
                complete RepositoryVersion
        """

        if self.complete:
            raise ResourceImmutableError(self)

        repo_content = []
        to_add = set(content.values_list('pk', flat=True))
        for added in batch_qs(self.content.values_list('pk', flat=True)):
            to_add = to_add - set(added.all())

        for content_pk in to_add:
            repo_content.append(
                RepositoryContent(repository=self.repository,
                                  content_id=content_pk,
                                  version_added=self))

        RepositoryContent.objects.bulk_create(repo_content)
Exemplo n.º 3
0
def remove_duplicates(repository_version):
    """
    Inspect content additions in the `RepositoryVersion` and remove existing repository duplicates.

    This function will inspect the content being added to a repo version and remove any existing
    content which would collide with the content being added to the repository version. It does not
    inspect the content being added for duplicates.

    Some content can have two instances A and B which are unique, but cannot both exist together in
    one repository. For example, pulp_file's content has `relative_path` for that file within the
    repository.

    Any content newly added to the :class:`~pulpcore.plugin.models.RepositoryVersion` is checked
    against existing content in the :class:`~pulpcore.plugin.models.RepositoryVersion` with newer
    "repository duplicates" replace existing "repository duplicates". Each Content model can define
    a `repo_key_fields` attribute with the field names to be compared. If all `repo_key_fields`
    contain the same value for two content units, they are considered "repository duplicates".

    Args:
        repository_version: The :class:`~pulpcore.plugin.models.RepositoryVersion` to be checked
            and possibly modified.
    """
    added_content = repository_version.added(
        base_version=repository_version.base_version)
    if repository_version.base_version:
        existing_content = repository_version.base_version.content
    else:
        try:
            existing_content = repository_version.previous().content
        except repository_version.DoesNotExist:
            existing_content = Content.objects.none()
    repository = repository_version.repository.cast()
    content_types = {
        type_obj.get_pulp_type(): type_obj
        for type_obj in repository.CONTENT_TYPES
    }

    for pulp_type, type_obj in content_types.items():
        repo_key_fields = type_obj.repo_key_fields
        new_content_qs = type_obj.objects.filter(pk__in=added_content.filter(
            pulp_type=pulp_type)).values(*repo_key_fields)

        if type_obj.repo_key_fields == ():
            continue

        if new_content_qs.count() and existing_content.count():
            _logger.debug(
                _("Removing duplicates for type: {}".format(
                    type_obj.get_pulp_type())))

            for batch in batch_qs(new_content_qs):
                find_dup_qs = Q()

                for content_dict in batch:
                    item_query = Q(**content_dict)
                    find_dup_qs |= item_query

                duplicates_qs = (type_obj.objects.filter(
                    pk__in=existing_content).filter(find_dup_qs).only("pk"))
                repository_version.remove_content(duplicates_qs)
Exemplo n.º 4
0
    def add_content(self, content):
        """
        Add a content unit to this version.

        Args:
           content (django.db.models.QuerySet): Set of Content to add

        Raise:
            pulpcore.exception.ResourceImmutableError: if add_content is called on a
                complete RepositoryVersion
        """

        if self.complete:
            raise ResourceImmutableError(self)

        repo_content = []
        to_add = set(content.values_list('pk', flat=True))
        for existing in batch_qs(
                self.content.order_by('pk').values_list('pk', flat=True)):
            to_add = to_add - set(existing.all())

        # Normalize representation if content has already been removed in this version and
        # is re-added: Undo removal by setting version_removed to None.
        for removed in batch_qs(self.removed().order_by('pk').values_list(
                'pk', flat=True)):
            to_readd = to_add.intersection(set(removed))
            if to_readd:
                RepositoryContent.objects.filter(
                    content__in=to_readd,
                    repository=self.repository,
                    version_removed=self).update(version_removed=None)
                to_add = to_add - to_readd

        for content_pk in to_add:
            repo_content.append(
                RepositoryContent(repository=self.repository,
                                  content_id=content_pk,
                                  version_added=self))

        RepositoryContent.objects.bulk_create(repo_content)
Exemplo n.º 5
0
    def add_content(self, content):
        """
        Add a content unit to this version.

        Args:
           content (django.db.models.QuerySet): Set of Content to add

        Raise:
            pulpcore.exception.ResourceImmutableError: if add_content is called on a
                complete RepositoryVersion
        """

        if self.complete:
            raise ResourceImmutableError(self)

        error_messages = []
        for type_obj in self.repository.CONTENT_TYPES:
            if type_obj.repo_key_fields == ():
                continue

            pulp_type = type_obj.get_pulp_type()
            repo_key_fields = type_obj.repo_key_fields
            new_content_total = type_obj.objects.filter(
                pk__in=content.filter(pulp_type=pulp_type)
            ).count()
            unique_new_content_total = type_obj.objects.filter(
                pk__in=content.filter(pulp_type=pulp_type)
            ).distinct(*repo_key_fields).count()

            if unique_new_content_total < new_content_total:
                error_messages.append(_(
                    "More than one {pulp_type} content with the duplicate values for {fields}."
                    ).format(
                        pulp_type=pulp_type,
                        fields=", ".join(repo_key_fields),
                    )
                )
        if error_messages:
            raise ValueError(
                _("Cannot create repository version. {msg}").format(msg=", ".join(error_messages))
            )

        repo_content = []
        to_add = set(content.exclude(pk__in=self.content).values_list('pk', flat=True))

        # Normalize representation if content has already been removed in this version and
        # is re-added: Undo removal by setting version_removed to None.
        for removed in batch_qs(self.removed().order_by('pk').values_list('pk', flat=True)):
            to_readd = to_add.intersection(set(removed))
            if to_readd:
                RepositoryContent.objects.filter(
                    content__in=to_readd,
                    repository=self.repository,
                    version_removed=self
                ).update(version_removed=None)
                to_add = to_add - to_readd

        for content_pk in to_add:
            repo_content.append(
                RepositoryContent(
                    repository=self.repository,
                    content_id=content_pk,
                    version_added=self
                )
            )

        RepositoryContent.objects.bulk_create(repo_content)