async def _handle_distribution(self, distribution): log.info( _('Downloading Release file for distribution: "{}"').format( distribution)) # Create release_file if distribution[-1] == "/": release_file_dir = distribution.strip("/") else: release_file_dir = os.path.join("dists", distribution) release_file_dc = DeclarativeContent( content=ReleaseFile(distribution=distribution), d_artifacts=[ self._to_d_artifact(os.path.join(release_file_dir, filename)) for filename in ["Release", "InRelease", "Release.gpg"] ], ) release_file = await self._create_unit(release_file_dc) if release_file is None: return # Create release object release_unit = Release(codename=release_file.codename, suite=release_file.suite, distribution=distribution) release_dc = DeclarativeContent(content=release_unit) release = await self._create_unit(release_dc) # Create release architectures architectures = _filter_split_architectures(release_file.architectures, self.remote.architectures, distribution) for architecture in architectures: release_architecture_dc = DeclarativeContent( content=ReleaseArchitecture(architecture=architecture, release=release)) await self.put(release_architecture_dc) # Parse release file log.info( _('Parsing Release file at distribution="{}"').format( distribution)) release_artifact = await _get_main_artifact_blocking(release_file) release_file_dict = deb822.Release(release_artifact.file) # collect file references in new dict file_references = defaultdict(deb822.Deb822Dict) for digest_name in ["SHA512", "SHA256", "SHA1", "MD5sum"]: if digest_name in release_file_dict: for unit in release_file_dict[digest_name]: file_references[unit["Name"]].update(unit) await asyncio.gather(*[ self._handle_component(component, release, release_file, file_references, architectures) for component in _filter_split_components( release_file.components, self.remote.components, distribution) ])
def find_structured_publish_content(content, src_repo_version): """ Finds the content for structured publish from packages to be copied and returns it all together. Args: content (iterable): Content for structured publish src_repo_version (pulpcore.models.RepositoryVersion): Source repo version Returns: Queryset of Content objects that extends intial set of content for structured publish """ # Content in the source repository version package_release_component_ids = src_repo_version.content.filter( pulp_type=PackageReleaseComponent.get_pulp_type() ).only("pk") architecture_ids = src_repo_version.content.filter( pulp_type=ReleaseArchitecture.get_pulp_type() ).only("pk") package_release_components = PackageReleaseComponent.objects.filter( pk__in=package_release_component_ids ) structured_publish_content = set() # Packages to be copied packages = Package.objects.filter(pk__in=content) structured_publish_content.update(packages.values_list("pk", flat=True)) if len(content) != len(packages): log.warning(_("Additional data with packages is provided. Removing from the content list.")) # List of all architectures architectures = ReleaseArchitecture.objects.filter(pk__in=architecture_ids).values_list( "pk", flat=True ) structured_publish_content.update(architectures) # Package release components, release components, release to be copied based on packages for pckg in package_release_components.iterator(): if pckg.package in packages: structured_publish_content.update( [pckg.pk, pckg.release_component.pk, pckg.release_component.release.pk] ) return Content.objects.filter(pk__in=structured_publish_content)
async def _handle_package_index( self, release_file, release_component, architecture, file_references, infix="", release=None, hybrid_format=False, ): # Create package_index release_base_path = os.path.dirname(release_file.relative_path) # Package index directory relative to the release file: release_file_package_index_dir = ( os.path.join(release_component.plain_component, infix, "binary-{}".format(architecture)) if release_file.distribution[-1] != "/" else "") # Package index directory relative to the repository root: package_index_dir = os.path.join(release_base_path, release_file_package_index_dir) d_artifacts = [] for filename in ["Packages", "Packages.gz", "Packages.xz", "Release"]: path = os.path.join(release_file_package_index_dir, filename) if path in file_references: relative_path = os.path.join(release_base_path, path) d_artifacts.append( self._to_d_artifact(relative_path, file_references[path])) if not d_artifacts: # This case will happen if it is not the case that 'path in file_references' for any of # ["Packages", "Packages.gz", "Packages.xz", "Release"]. The only case where this is # known to occur is when the remote uses 'sync_udebs = True', but the upstream repo does # not contain any debian-installer indices. message = ( "Looking for package indices in '{}', but the Release file does not reference any! " "Ignoring.") log.warning(_(message).format(package_index_dir)) if "debian-installer" in package_index_dir and self.remote.sync_udebs: message = ( "It looks like the remote is using 'sync_udebs=True', but there is no " "installer package index.") log.info(_(message)) return relative_path = os.path.join(package_index_dir, "Packages") log.info( _('Creating PackageIndex unit with relative_path="{}".').format( relative_path)) content_unit = PackageIndex( release=release_file, component=release_component.component, architecture=architecture, sha256=d_artifacts[0].artifact.sha256, relative_path=relative_path, ) package_index = await self._create_unit( DeclarativeContent(content=content_unit, d_artifacts=d_artifacts)) if not package_index: if (settings.FORCE_IGNORE_MISSING_PACKAGE_INDICES or self.remote.ignore_missing_package_indices ) and architecture != "all": message = "No suitable package index files found in '{}'. Skipping." log.info(_(message).format(package_index_dir)) return else: raise NoPackageIndexFile(relative_dir=package_index_dir) # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE # parse package_index package_futures = [] package_index_artifact = await _get_main_artifact_blocking( package_index) for package_paragraph in deb822.Packages.iter_paragraphs( package_index_artifact.file): # Sanity check the architecture from the package paragraph: package_paragraph_architecture = package_paragraph["Architecture"] if release_file.distribution[-1] == "/": if (self.remote.architectures and package_paragraph_architecture != "all" and package_paragraph_architecture not in self.remote.architectures.split()): message = ( "Omitting package '{}' with architecture '{}' from flat repo distribution " "'{}', since we are filtering for architectures '{}'!") log.debug( _(message).format( package_paragraph["Filename"], package_paragraph_architecture, release_file.distribution, self.remote.architectures, )) continue # We drop packages if the package_paragraph_architecture != architecture unless that # architecture is "all" in a "mixed" (containing all as well as architecture specific # packages) package index: elif (package_paragraph_architecture != "all" or "all" in release_file.architectures.split() ) and package_paragraph_architecture != architecture: if not hybrid_format: message = ( "The upstream package index in '{}' contains package '{}' with wrong " "architecture '{}'. Skipping!") log.warning( _(message).format( package_index_dir, package_paragraph["Filename"], package_paragraph_architecture, )) continue try: package_relpath = os.path.normpath( package_paragraph["Filename"]) package_sha256 = package_paragraph["sha256"] if package_relpath.endswith(".deb"): package_class = Package serializer_class = Package822Serializer elif package_relpath.endswith(".udeb"): package_class = InstallerPackage serializer_class = InstallerPackage822Serializer log.debug( _("Downloading package {}").format( package_paragraph["Package"])) serializer = serializer_class.from822(data=package_paragraph) serializer.is_valid(raise_exception=True) package_content_unit = package_class( relative_path=package_relpath, sha256=package_sha256, **serializer.validated_data, ) package_path = os.path.join(self.parsed_url.path, package_relpath) package_da = DeclarativeArtifact( artifact=Artifact(size=int(package_paragraph["Size"]), **_get_checksums(package_paragraph)), url=urlunparse( self.parsed_url._replace(path=package_path)), relative_path=package_relpath, remote=self.remote, deferred_download=deferred_download, ) package_dc = DeclarativeContent(content=package_content_unit, d_artifacts=[package_da]) package_futures.append(package_dc) await self.put(package_dc) except KeyError: log.warning( _("Ignoring invalid package paragraph. {}").format( package_paragraph)) # Assign packages to this release_component package_architectures = set([]) for package_future in package_futures: package = await package_future.resolution() if not isinstance(package, Package): # TODO repeat this for installer packages continue package_release_component_dc = DeclarativeContent( content=PackageReleaseComponent( package=package, release_component=release_component)) await self.put(package_release_component_dc) if release_file.distribution[-1] == "/": package_architectures.add(package.architecture) # For flat repos we may still need to create ReleaseArchitecture content: if release_file.distribution[-1] == "/": if release_file.architectures: for architecture in package_architectures: if architecture not in release_file.architectures.split(): message = ( "The flat repo with distribution '{}' contains packages with " "architecture '{}' but this is not included in the ReleaseFile's " "architectures field '{}'!") log.warning( _(message).format(release_file.distribution, architecture, release_file.architectures)) message = "Creating additional ReleaseArchitecture for architecture '{}'!" log.warning(_(message).format(architecture)) release_architecture_dc = DeclarativeContent( content=ReleaseArchitecture( architecture=architecture, release=release)) await self.put(release_architecture_dc) else: package_architectures_string = " ".join(package_architectures) message = ( "The ReleaseFile of the flat repo with distribution '{}' has an empty " "architectures field!") log.warning(_(message).format(release_file.distribution)) message = ( "Creating ReleaseArchitecture content for architectures '{}', extracted from " "the synced packages.") log.warning(_(message).format(package_architectures_string)) for architecture in package_architectures: release_architecture_dc = DeclarativeContent( content=ReleaseArchitecture(architecture=architecture, release=release)) await self.put(release_architecture_dc)
async def _handle_package_index( self, release_file, release_component, architecture, file_references, infix="", release=None, ): # Create package_index release_base_path = os.path.dirname(release_file.relative_path) package_index_dir = (os.path.join(release_component.plain_component, infix, "binary-{}".format(architecture)) if release_file.distribution[-1] != "/" else "") d_artifacts = [] for filename in ["Packages", "Packages.gz", "Packages.xz", "Release"]: path = os.path.join(package_index_dir, filename) if path in file_references: relative_path = os.path.join(release_base_path, path) d_artifacts.append( self._to_d_artifact(relative_path, file_references[path])) if not d_artifacts: log.warning( _('No package index file found in "{}"!').format( package_index_dir)) # No package index, nothing to do. return relative_path = os.path.join(release_base_path, package_index_dir, "Packages") log.info( _('Creating PackageIndex unit with relative_path="{}".').format( relative_path)) content_unit = PackageIndex( release=release_file, component=release_component.component, architecture=architecture, sha256=d_artifacts[0].artifact.sha256, relative_path=relative_path, ) package_index = await self._create_unit( DeclarativeContent(content=content_unit, d_artifacts=d_artifacts)) if not package_index: if self.remote.ignore_missing_package_indices: log.info( _("No packages index for architecture {}. Skipping."). format(architecture)) return else: relative_dir = os.path.join(release_base_path, package_index_dir) raise NoPackageIndexFile(relative_dir=relative_dir) # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE # parse package_index package_futures = [] package_index_artifact = await _get_main_artifact_blocking( package_index) for package_paragraph in deb822.Packages.iter_paragraphs( package_index_artifact.file): if (self.remote.architectures and release_file.distribution[-1] == "/" and package_paragraph["Architecture"] != "all" and package_paragraph["Architecture"] not in self.remote.architectures.split()): message = ( "Omitting package '{}' with architecture '{}' from flat repo distribution '{}'" ", since we are filtering for architectures '{}'!") log.debug( _(message).format( package_paragraph["Filename"], package_paragraph["Architecture"], release_file.distribution, self.remote.architectures, )) continue try: package_relpath = os.path.normpath( package_paragraph["Filename"]) package_sha256 = package_paragraph["sha256"] if package_relpath.endswith(".deb"): package_class = Package serializer_class = Package822Serializer elif package_relpath.endswith(".udeb"): package_class = InstallerPackage serializer_class = InstallerPackage822Serializer log.debug( _("Downloading package {}").format( package_paragraph["Package"])) serializer = serializer_class.from822(data=package_paragraph) serializer.is_valid(raise_exception=True) package_content_unit = package_class( relative_path=package_relpath, sha256=package_sha256, **serializer.validated_data, ) package_path = os.path.join(self.parsed_url.path, package_relpath) package_da = DeclarativeArtifact( artifact=Artifact(size=int(package_paragraph["Size"]), **_get_checksums(package_paragraph)), url=urlunparse( self.parsed_url._replace(path=package_path)), relative_path=package_relpath, remote=self.remote, deferred_download=deferred_download, ) package_dc = DeclarativeContent(content=package_content_unit, d_artifacts=[package_da]) package_futures.append(package_dc) await self.put(package_dc) except KeyError: log.warning( _("Ignoring invalid package paragraph. {}").format( package_paragraph)) # Assign packages to this release_component package_architectures = set([]) for package_future in package_futures: package = await package_future.resolution() if not isinstance(package, Package): # TODO repeat this for installer packages continue package_release_component_dc = DeclarativeContent( content=PackageReleaseComponent( package=package, release_component=release_component)) await self.put(package_release_component_dc) if release_file.distribution[-1] == "/": package_architectures.add(package.architecture) # For flat repos we may still need to create ReleaseArchitecture content: if release_file.distribution[-1] == "/": if release_file.architectures: for architecture in package_architectures: if architecture not in release_file.architectures.split(): message = ( "The flat repo with distribution '{}' contains packages with " "architecture '{}' but this is not included in the ReleaseFile's " "architectures field '{}'!") log.warning( _(message).format(release_file.distribution, architecture, release_file.architectures)) message = "Creating additional ReleaseArchitecture for architecture '{}'!" log.warning(_(message).format(architecture)) release_architecture_dc = DeclarativeContent( content=ReleaseArchitecture( architecture=architecture, release=release)) await self.put(release_architecture_dc) else: package_architectures_string = " ".join(package_architectures) message = ( "The ReleaseFile of the flat repo with distribution '{}' has an empty " "architectures field!") log.warning(_(message).format(release_file.distribution)) message = ( "Creating ReleaseArchitecture content for architectures '{}', extracted from " "the synced packages.") log.warning(_(message).format(package_architectures_string)) for architecture in package_architectures: release_architecture_dc = DeclarativeContent( content=ReleaseArchitecture(architecture=architecture, release=release)) await self.put(release_architecture_dc)
async def _handle_distribution(self, distribution): log.info( _('Downloading Release file for distribution: "{}"').format( distribution)) # Create release_file if distribution[-1] == "/": release_file_dir = distribution.strip("/") else: release_file_dir = os.path.join("dists", distribution) release_file_dc = DeclarativeContent( content=ReleaseFile(distribution=distribution), d_artifacts=[ self._to_d_artifact(os.path.join(release_file_dir, filename)) for filename in ["Release", "InRelease", "Release.gpg"] ], ) release_file = await self._create_unit(release_file_dc) if release_file is None: return # Create release object release_unit = Release(codename=release_file.codename, suite=release_file.suite, distribution=distribution) release_dc = DeclarativeContent(content=release_unit) release = await self._create_unit(release_dc) # Create release architectures if release_file.architectures: architectures = _filter_split_architectures( release_file.architectures, self.remote.architectures, distribution) elif distribution[-1] == "/": message = ( "The ReleaseFile content unit architecrures are unset for the flat repo with " "distribution '{}'. ReleaseArchitecture content creation is deferred!" ) log.warning(_(message).format(distribution)) architectures = [] for architecture in architectures: release_architecture_dc = DeclarativeContent( content=ReleaseArchitecture(architecture=architecture, release=release)) await self.put(release_architecture_dc) # Parse release file log.info( _('Parsing Release file at distribution="{}"').format( distribution)) release_artifact = await _get_main_artifact_blocking(release_file) release_file_dict = deb822.Release(release_artifact.file) # Retrieve and interpret any 'No-Support-for-Architecture-all' value: # We will refer to the presence of 'No-Support-for-Architecture-all: Packages' in a Release # file as indicating "hybrid format". For more info, see: # https://wiki.debian.org/DebianRepository/Format#No-Support-for-Architecture-all no_support_for_arch_all = release_file_dict.get( "No-Support-for-Architecture-all", "") if no_support_for_arch_all.strip() == "Packages": hybrid_format = True elif not no_support_for_arch_all: hybrid_format = False else: raise UnknownNoSupportForArchitectureAllValue( release_file.relative_path, no_support_for_arch_all) # collect file references in new dict file_references = defaultdict(deb822.Deb822Dict) for digest_name in ["SHA512", "SHA256", "SHA1", "MD5sum"]: if digest_name in release_file_dict: for unit in release_file_dict[digest_name]: file_references[unit["Name"]].update(unit) if distribution[-1] == "/": # Handle flat repo sub_tasks = [ self._handle_flat_repo(file_references, release_file, release) ] else: # Handle components sub_tasks = [ self._handle_component( component, release, release_file, file_references, architectures, hybrid_format, ) for component in _filter_split_components( release_file.components, self.remote.components, distribution) ] await asyncio.gather(*sub_tasks)