Exemplo n.º 1
0
    def _search_units(
        cls, repo, criteria_list, content_type_id, batch_size_override=None
    ):
        """
        Search for units of one content type associated with given repository by criteria.
        """
        units = set()
        batch_size = batch_size_override or BATCH_SIZE

        def handle_results(page):
            for unit in page.data:
                unit = UbiUnit(unit, repo.id)
                units.add(unit)
            if page.next:
                return f_flat_map(page.next, handle_results)
            return f_return(units)

        criteria_split = []

        for start in range(0, len(criteria_list), batch_size):
            criteria_split.append(criteria_list[start : start + batch_size])
        fts = []

        for criteria_batch in criteria_split:
            _criteria = Criteria.and_(
                Criteria.with_field("content_type_id", content_type_id),
                Criteria.or_(*criteria_batch),
            )

            page_f = repo.search_content(_criteria)
            handled_f = f_flat_map(page_f, handle_results)

            fts.append(handled_f)

        return f_flat_map(f_sequence(fts), flatten_list_of_sets)
Exemplo n.º 2
0
def test_flat_map_error():
    map_in = f_return(0)

    # This one should fail...
    mapped = f_flat_map(map_in, div10)

    # Now map it through an error handler
    mapped = f_flat_map(mapped, error_fn=lambda ex: f_return(str(ex)))

    result = mapped.result()
    assert "division" in result
Exemplo n.º 3
0
def test_flat_map_error_fn_raises():
    map_in = f_return(0)

    # This one should fail...
    mapped = f_flat_map(map_in, div10)

    # Now map it through an error handler
    mapped = f_flat_map(mapped, error_fn=raise_str)

    ex = mapped.exception()
    assert "division" in str(ex)
    assert "oops, an error" in str(ex)
    def ensure_product_versions_uptodate(self, ctx):
        # Ensures that the product_versions field contains all the product
        # versions from this cert, in all repos containing this productid as well
        # as any repos sharing a certain relationship.

        # First we need to figure out the repos to handle.
        # We start from the repos we're contained in.
        repo_ids = self.in_pulp_repos
        repo_fs = [ctx.client.get_repository(repo_id) for repo_id in repo_ids]

        # Then start looking up any 'related repos' for them.
        # This finder class manages the searches and avoids duplicate searches.
        finder = RepoFinder(client=ctx.client)
        find_related_fs = [
            f_map(repo_f, finder.find_related) for repo_f in repo_fs
        ]

        # Once all the find_related searches have been set up, we can get the
        # iterable over all repos.
        repo_iter_f = f_map(f_sequence(find_related_fs),
                            lambda _: finder.all_results)

        return f_flat_map(
            repo_iter_f,
            lambda repos: self.ensure_product_versions_uptodate_in_repos(
                ctx, repos),
        )
Exemplo n.º 5
0
    def _do_upload_file(self, upload_id, file_obj, name):
        def do_next_upload(checksum, size):
            data = file_obj.read(self._CHUNK_SIZE)
            if data:
                if isinstance(data, six.text_type):
                    # if it's unicode, need to encode before calculate checksum
                    data = data.encode("utf-8")
                checksum.update(data)
                return f_flat_map(
                    self._do_upload(data, upload_id, size),
                    lambda _: do_next_upload(checksum, size + len(data)),
                )
            # nothing more to upload, return checksum and size
            return f_return((checksum.hexdigest(), size))

        is_file_object = "close" in dir(file_obj)
        if not is_file_object:
            file_obj = open(file_obj, "rb")

        LOG.info("Uploading %s to Pulp", name)
        upload_f = f_flat_map(f_return(),
                              lambda _: do_next_upload(hashlib.sha256(), 0))

        if not is_file_object:
            upload_f.add_done_callback(lambda _: file_obj.close())
        return upload_f
Exemplo n.º 6
0
    def publish_with_cache_flush(self, repos, units=None, pulp_client=None):
        # Ensure all repos in 'repos' are fully published, and CDN/UD caches are flushed.
        #
        # If 'units' are provided, ensures those units have cdn_published field set after
        # the publish and before the UD cache flush.
        #
        units = units or []
        pulp_client = pulp_client or self.pulp_client

        # publish the repos found
        publish_fs = self.publish(repos)

        # wait for the publish to complete before
        # flushing caches.
        f_sequence(publish_fs).result()

        # hook implementation(s) may now flush pulp-derived caches and datastores
        pm.hook.task_pulp_flush()

        # flush CDN cache
        out = self.flush_cdn(repos)

        # set units as published
        set_published = f_sequence(self.set_cdn_published(units, pulp_client))

        # flush UD cache only after cdn_published is set (if applicable)
        flush_ud = f_flat_map(set_published,
                              lambda _: f_sequence(self.flush_ud(repos)))
        out.append(flush_ud)

        return out
Exemplo n.º 7
0
    def _do_upload_file(self, upload_id, file_obj, name="<unknown file>"):  # pylint: disable=unused-argument
        # We keep track of uploaded content as we may need it at import time.
        buffer = six.BytesIO()
        self._uploads_pending[upload_id] = buffer

        is_file_obj = "close" in dir(file_obj)
        if not is_file_obj:
            file_obj = open(file_obj, "rb")

        def do_next_upload(checksum, size):
            while True:
                data = file_obj.read(1024 * 1024)
                if not data:
                    break
                if isinstance(data, six.text_type):
                    data = data.encode("utf-8")
                buffer.write(data)
                checksum.update(data)
                size += len(data)

            return f_return(UploadResult(checksum.hexdigest(), size))

        out = f_flat_map(f_return(),
                         lambda _: do_next_upload(hashlib.sha256(), 0))

        out.add_done_callback(lambda _: file_obj.close())

        return out
Exemplo n.º 8
0
 def handle_results(page):
     for unit in page.data:
         unit = UbiUnit(unit, repo.id)
         units.add(unit)
     if page.next:
         return f_flat_map(page.next, handle_results)
     return f_return(units)
Exemplo n.º 9
0
    def search_content(self, criteria=None):
        """Search for units across all repositories.

        Args:
            criteria (:class:`~pubtools.pulplib.Criteria`)
                A criteria object used for this search.
                If None, search for all units.

        Returns:
            Future[:class:`~pubtools.pulplib.Page`]
                A future representing the first page of results.

                Each page will contain a collection of
                :class:`~pubtools.pulplib.Unit` subclasses objects.

        .. versionadded:: 2.6.0
        """
        # Criteria will be serialized into a Pulp search at the time we
        # actually do the query, but validate eagerly as well so we raise
        # ASAP on invalid input.
        search_for_criteria(criteria, Unit, None)

        if self._server_type_ids is None:
            # We'll be using this in a moment either to set default IDs or to
            # reject searches for invalid types.
            # Note: no locking, so if we're called from multiple threads we in
            # theory might waste some time querying the types more than once.
            self._server_type_ids = self.get_content_type_ids()

        return f_proxy(
            f_flat_map(
                self._server_type_ids,
                lambda ids: self._search_content_with_server_type_ids(
                    criteria, ids),
            ))
Exemplo n.º 10
0
    def set_maintenance(self, report):
        """Set maintenance mode for this Pulp server.

        Args:
            report:
                An updated :class:`~pubtools.pulplib.MaintenanceReport` object that
                will be used as the newest maintenance report.
        Return:
            Future[list[:class:`~pubtools.pulplib.Task`]]
                A future which is resolved when maintenance mode has been updated successfully.

                The future contains a task triggered and awaited during the publish
                maintenance repository operation.

        .. versionadded:: 1.4.0
        """
        report_json = json.dumps(report._export_dict(),
                                 indent=4,
                                 sort_keys=True)
        report_fileobj = StringIO(report_json)

        repo = self.get_repository("redhat-maintenance").result()

        # upload updated report to repository and publish
        upload_ft = repo.upload_file(report_fileobj, "repos.json")

        return f_flat_map(upload_ft, lambda _: repo.publish())
Exemplo n.º 11
0
def compile_ud_mappings(repo, do_request):
    """Perform the UD mappings note compilation & update process for a given repo.

    Arguments:
        repo (~pulplib.FileRepository)
            A repository.
        do_request (callable)
            A function which can be invoked to perform an HTTP request to Pulp.

    Returns:
        A Future, resolved when the update completes successfully.
    """
    LOG.debug("%s: compiling %s", repo.id, UD_MAPPINGS_NOTE)

    # 1. Get current mappings.
    #
    # Requires a fresh retrieval of the repo since we don't store
    # these mappings on our model.
    #
    repo_url = "pulp/api/v2/repositories/%s/" % repo.id

    repo_raw_f = do_request(repo_url, method="GET")
    mappings_f = f_map(
        repo_raw_f, lambda data:
        (data.get("notes") or {}).get(UD_MAPPINGS_NOTE) or "{}")

    # Mappings are stored as JSON, so decode them
    mappings_f = f_map(mappings_f, json.loads)

    # Wrap them in our helper for keeping track of changes
    mappings_f = f_map(mappings_f, MappingsHelper)

    # 2. Iterate over all files in the repo
    files_f = repo.search_content(Criteria.with_unit_type(FileUnit))

    # 3. Mutate the mappings as needed for each file
    updated_mappings_f = f_flat_map(
        f_zip(mappings_f, files_f),
        lambda tup: update_mappings_for_files(tup[0], tup[1]),
    )

    # 4. Upload them back if any changes
    handle_changes = functools.partial(upload_changed_mappings,
                                       repo=repo,
                                       repo_url=repo_url,
                                       do_request=do_request)
    return f_flat_map(updated_mappings_f, handle_changes)
Exemplo n.º 12
0
    def ensure_uploaded(self, ctx, repo_f=None):
        # Overridden to add the post-upload step of product_versions update.
        uploaded_item = super(PulpProductIdPushItem,
                              self).ensure_uploaded(ctx, repo_f)

        return f_flat_map(
            uploaded_item,
            lambda item: item.ensure_product_versions_uptodate(ctx))
Exemplo n.º 13
0
 def __call__(self, page):
     self._got_data.extend(page.data)
     if page.next:
         # set ourselves up to process the next page.
         # It's important here that we only grab a reference to page.next
         # and don't keep a reference to page, as we are trying to cover
         # a previous bug where that scenario would wrongly cancel search.
         return f_flat_map(page.next, self)
     return f_return(self._got_data)
Exemplo n.º 14
0
 def do_next_upload(checksum, size):
     data = file_obj.read(self._CHUNK_SIZE)
     if data:
         checksum.update(data)
         return f_flat_map(
             self._do_upload(data, upload_id, size),
             lambda _: do_next_upload(checksum, size + len(data)),
         )
     # nothing more to upload, return checksum and size
     return f_return((checksum.hexdigest(), size))
Exemplo n.º 15
0
 def do_next_upload(checksum, size):
     data = file_obj.read(self._CHUNK_SIZE)
     if data:
         if isinstance(data, six.text_type):
             # if it's unicode, need to encode before calculate checksum
             data = data.encode("utf-8")
         checksum.update(data)
         return f_flat_map(
             self._do_upload(data, upload_id, size),
             lambda _: do_next_upload(checksum, size + len(data)),
         )
     # nothing more to upload, return checksum and size
     return f_return((checksum.hexdigest(), size))
Exemplo n.º 16
0
    def set_maintenance(self, report):
        report_json = json.dumps(report._export_dict(),
                                 indent=4,
                                 sort_keys=True)
        report_fileobj = StringIO(report_json)

        repo = self.get_repository("redhat-maintenance").result()

        # upload updated report to repository and publish
        upload_ft = repo.upload_file(report_fileobj, "repos.json")

        publish_ft = f_flat_map(upload_ft, lambda _: repo.publish())
        self._maintenance_report = report_json

        return f_proxy(publish_ft)
Exemplo n.º 17
0
    def _search_units_per_repos(
        cls, or_criteria, repos, content_type, batch_size_override=None
    ):
        units = []
        for repo in repos:
            units.append(
                cls._search_units(
                    repo,
                    or_criteria,
                    content_type,
                    batch_size_override=batch_size_override,
                )
            )

        return f_proxy(f_flat_map(f_sequence(units), flatten_list_of_sets))
Exemplo n.º 18
0
    def ensure_uploaded(self, ctx, repo_f=None):
        # ensure_uploaded is overridden to upload to *all* destination repos rather than
        # only one.
        repo_ids = self.pushsource_item.dest

        repo_fs = [ctx.client.get_repository(repo_id) for repo_id in repo_ids]

        upload_fs = [f_flat_map(f, self.upload_to_repo) for f in repo_fs]
        all_uploaded_f = f_sequence(upload_fs)

        # Once uploaded to all repos, as long as those uploads were successful, we'll
        # simply mark ourselves as IN_REPOS without doing any Pulp queries.
        return f_map(
            all_uploaded_f,
            lambda _: attr.evolve(
                self, uploaded_repos=repo_ids, pulp_state=State.IN_REPOS),
        )
Exemplo n.º 19
0
def update_mappings_for_files(mappings, file_page):
    # Updates mappings for every file in a single page, plus all
    # following pages (async).
    #
    # Returns Future[mappings] once all pages are processed.

    for unit in file_page.data:
        version = unit.version
        if version:
            mappings.set_file_mapping(version, unit.path, unit.display_order)

    if not file_page.next:
        # No more files, just return the mappings
        return f_return(mappings)

    # There's more files, keep going to the next page.
    return f_flat_map(file_page.next,
                      lambda page: update_mappings_for_files(mappings, page))
Exemplo n.º 20
0
    def _publish_repository(self, repo, distributors_with_config):
        tasks_f = f_return([])

        def do_next_publish(accumulated_tasks, distributor, config):
            distributor_tasks_f = self._publish_distributor(
                repo.id, distributor.id, config)
            return f_map(
                distributor_tasks_f,
                lambda distributor_tasks: accumulated_tasks +
                distributor_tasks,
            )

        for (distributor, config) in distributors_with_config:
            next_publish = partial(do_next_publish,
                                   distributor=distributor,
                                   config=config)
            tasks_f = f_flat_map(tasks_f, next_publish)

        return tasks_f
Exemplo n.º 21
0
    def _do_upload_file(self, upload_id, file_obj, name):
        # pylint: disable=unused-argument
        is_file_obj = "close" in dir(file_obj)
        if not is_file_obj:
            file_obj = open(file_obj, "rb")

        def do_next_upload(checksum, size):
            data = file_obj.read(1024 * 1024)
            if data:
                checksum.update(data)
                size += len(data)
                return do_next_upload(checksum, size)
            return f_return((checksum.hexdigest(), size))

        out = f_flat_map(f_return(),
                         lambda _: do_next_upload(hashlib.sha256(), 0))

        if not is_file_obj:
            out.add_done_callback(lambda _: file_obj.close())

        return out
Exemplo n.º 22
0
    def _do_upload_file(self, upload_id, file_obj, name):
        def do_next_upload(checksum, size):
            data = file_obj.read(self._CHUNK_SIZE)
            if data:
                checksum.update(data)
                return f_flat_map(
                    self._do_upload(data, upload_id, size),
                    lambda _: do_next_upload(checksum, size + len(data)),
                )
            # nothing more to upload, return checksum and size
            return f_return((checksum.hexdigest(), size))

        is_file_object = "close" in dir(file_obj)
        if not is_file_object:
            file_obj = open(file_obj, "rb")

        LOG.info("Uploading %s to Pulp", name)
        upload_f = f_flat_map(f_return(),
                              lambda _: do_next_upload(hashlib.sha256(), 0))

        if not is_file_object:
            upload_f.add_done_callback(lambda _: file_obj.close())
        return upload_f
Exemplo n.º 23
0
 def publish(self, repo_fs):
     return [
         f_flat_map(f, lambda r: r.publish(PublishOptions(clean=True)))
         for f in repo_fs
     ]
Exemplo n.º 24
0
    def _upload_then_import(self,
                            file_obj,
                            name,
                            type_id,
                            unit_key_fn=None,
                            unit_metadata_fn=None):
        """Private helper to upload and import a piece of content into this repo.

        To be called by the type-specific subclasses (e.g. YumRepository,
        FileRepository...)

        Args:
            file_obj (str, file-like object, None):
                file object or path (as documented in public methods), or None
                if this unit type has no associated file

            name (str):
                a brief user-meaningful name for the content being uploaded
                (appears in logs)

            type_id (str):
                pulp unit type ID

            unit_key_fn (callable):
                a callable which will be invoked with the return value of
                _do_upload_file (or None if file_obj is None).
                It should return the unit key for this piece of
                content. If omitted, an empty unit key is used, which means Pulp
                is wholly responsible for calculating the unit key.

            unit_metadata_fn (callable):
                a callable which will be invoked with the return value of
                _do_upload_file (or None if file_obj is None). It should return
                the unit metadata for this piece of
                content. If omitted, metadata is not included in the import call to
                Pulp.
        """

        if not self._client:
            raise DetachedException()

        unit_key_fn = unit_key_fn or (lambda _: {})
        unit_metadata_fn = unit_metadata_fn or (lambda _: None)

        upload_id_f = f_map(self._client._request_upload(name),
                            lambda upload: upload["upload_id"])

        f_map(
            upload_id_f,
            lambda upload_id: LOG.info("Uploading %s to %s [%s]", name, self.
                                       id, upload_id),
        )

        if file_obj is None:
            # If there is no file for this kind of unit (e.g. erratum),
            # we still have to use the request_upload and import APIs; we just
            # never upload any bytes. That means the upload is 'complete' as
            # soon as the upload ID is known. A real upload returns a (size, checksum)
            # tuple; we force a no-content upload to return None.
            upload_complete_f = f_map(upload_id_f, lambda _: None)
        else:
            upload_complete_f = f_flat_map(
                upload_id_f,
                lambda upload_id: self._client._do_upload_file(
                    upload_id, file_obj, name),
            )

        import_complete_f = f_flat_map(
            upload_complete_f,
            lambda upload: self._client._do_import(
                self.id,
                upload_id_f.result(),
                type_id,
                unit_key_fn(upload),
                unit_metadata_fn(upload),
            ),
        )

        f_map(
            import_complete_f,
            lambda _: self._client._delete_upload_request(
                upload_id_f.result(), name),
        )

        return f_proxy(import_complete_f)
Exemplo n.º 25
0
 def record_clears(self, cleared_repo_fs):
     return [f_flat_map(f, self.record_cleared_repo) for f in cleared_repo_fs]
Exemplo n.º 26
0
def test_flat_map_nothing():
    map_in = f_return(10)
    mapped = f_flat_map(map_in)
    assert mapped.result() == 10
Exemplo n.º 27
0
def test_flat_map():
    map_in = f_return(10)
    mapped = f_flat_map(map_in, div10)
    assert mapped.result() == 1
Exemplo n.º 28
0
def test_flat_map_error():
    with pytest.raises(TypeError):
        f_flat_map("a", lambda x: x)

    with pytest.raises(TypeError):
        f_flat_map(future="a", fn=lambda x: x)
Exemplo n.º 29
0
    def upload_comps_xml(self, file_obj):
        """Upload a comps.xml file to this repository.

        .. warning::

            Beware of the following quirks with respect to the upload of comps.xml:

            * Pulp does not directly store the uploaded XML. Instead, this library
              parses the XML and uses the content to store various units. The comps
              XML rendered as a yum repository is published is therefore not
              guaranteed to be bytewise-identical to the uploaded content.

            * The uploaded XML must contain all comps data for the repo, as
              any existing comps data will be removed from the repo.

            * The XML parser is not secure against maliciously constructed data.

            * The process of parsing the XML and storing units consists of multiple
              steps which cannot be executed atomically. That means *if this
              operation is interrupted, the repository may be left with incomplete
              data*. It's recommended to avoid publishing a repository in this state.

        Args:
            file_obj (str, file object)
                If it's a string, then it's the path of a comps XML
                file to upload.

                Otherwise, it should be a
                `file-like object <https://docs.python.org/3/glossary.html#term-file-object>`_
                pointing at the bytes of a valid comps.xml file.

                The client takes ownership of this file object; it should
                not be modified elsewhere, and will be closed when upload
                completes.

        Returns:
            Future[list of :class:`~pubtools.pulplib.Task`]
                A future which is resolved after content has been imported
                to this repo.

        Raises:
            DetachedException
                If this instance is not attached to a Pulp client.

        .. versionadded:: 2.17.0
        """
        if isinstance(file_obj, six.string_types):
            file_name = file_obj
            file_obj = open(file_obj, "rb")
        else:
            file_name = getattr(file_obj, "name", "comps.xml")

        # Parse the provided XML. We will crash here if the given XML is not
        # valid.
        with file_obj:
            unit_dicts = comps.units_for_xml(file_obj)

        # Every comps-related unit type has a repo_id which should reference the repo
        # we're uploading to.
        for unit in unit_dicts:
            unit["repo_id"] = self.id

        comps_type_ids = [
            "package_group",
            "package_category",
            "package_environment",
            "package_langpacks",
        ]

        # Remove former units of comps-related types so that the end result is only
        # those units included in the current XML.
        out = self.remove_content(type_ids=comps_type_ids)

        # Once removal is done we can upload each unit.
        upload_f = []
        for unit_dict in unit_dicts:
            type_id = unit_dict["_content_type_id"]

            # For one comps.xml we are doing multiple upload operations, each of
            # which would be logged independently. Come up with some reasonable name
            # for each unit to put into the logs.
            #
            # Example: if uploading my-comps.xml and processing a package_group
            # with id kde-desktop-environment, the name for logging purposes would
            # be: "my-comps.xml [group.kde-desktop-environment]".
            #
            unit_name = type_id.replace("package_", "")
            if unit_dict.get("id"):
                unit_name = "%s.%s" % (unit_name, unit_dict["id"])
            unit_name = "%s [%s]" % (file_name, unit_name)

            upload_f.append(
                f_flat_map(
                    out,
                    self._comps_unit_uploader(unit_name, type_id, unit_dict)))

        # If there were no units to upload then just return the removal.
        if not upload_f:
            return out

        # There were uploads, then we'll wait for all of them to complete and
        # return the tasks for all.
        out = f_zip(*upload_f)
        out = f_map(out, lambda uploads: sum(uploads, []))

        return out
Exemplo n.º 30
0
    def upload_file(self, file_obj, relative_url=None):
        """Upload a file to this repository.

        Args:
            file_obj (str, file object)
                If it's a string, then it's the path of file to upload.
                Else, it ought to be a
                `file-like object <https://docs.python.org/3/glossary.html#term-file-object>`_.


            relative_url (str)
                Path that should be used in remote repository, can either
                be a path to a directory or a path to a file, e.g:

                - if relative_url is 'foo/bar/' and file_obj has name 'f.txt',
                  the resulting remote path wll be 'foo/bar/f.txt'.

                - if relative_url is 'foo/bar/f.txt', no matter what the
                  name of file_obj is, the remote path is 'foo/bar/f.txt'.

                If omitted, the local name of the file will be used. Or,
                if file_obj is a file object without a `name` attribute,
                passing `relative_url` is mandatory.

        Returns:
            Future[list of :class:`~pubtools.pulplib.Task`]
                A future which is resolved when import succeeds.

                The future contains the task to import uploaded content
                to repository.

        Raises:
            DetachedException
                If this instance is not attached to a Pulp client.

        .. versionadded:: 1.2.0
        """
        if not self._client:
            raise DetachedException()

        relative_url = self._get_relative_url(file_obj, relative_url)
        name = os.path.basename(relative_url)

        # request upload id and wait for it
        upload_id = self._client._request_upload().result()["upload_id"]

        upload_complete_f = self._client._do_upload_file(
            upload_id, file_obj, name)

        import_complete_f = f_flat_map(
            upload_complete_f,
            lambda upload: self._client._do_import(
                self.id,
                upload_id,
                "iso",
                {
                    "name": relative_url,
                    "checksum": upload[0],
                    "size": upload[1]
                },
            ),
        )

        f_map(import_complete_f,
              lambda _: self._client._delete_upload_request(upload_id))

        return f_proxy(import_complete_f)