def is_optimized_sync(repository, remote, url): """ Check whether it is possible to optimize the synchronization or not. Caution: we are not storing when the remote was last updated, so the order of this logic must remain in this order where we first check the version number as other changes than sync could have taken place such that the date or repo version will be different from last sync. Args: repository(RpmRepository): An RpmRepository to check optimization for. remote(RpmRemote): An RPMRemote to check optimization for. url(str): A remote repository URL. Returns: bool: True, if sync is optimized; False, otherwise. """ with WorkingDirectory(): result = get_repomd_file(remote, url) if not result: return False repomd_path = result.path repomd = cr.Repomd(repomd_path) repomd_checksum = get_sha256(repomd_path) is_optimized = ( repository.last_sync_remote and remote.pk == repository.last_sync_remote.pk and repository.last_sync_repo_version == repository.latest_version().number and remote.pulp_last_updated <= repository.latest_version().pulp_created and is_previous_version(repomd.revision, repository.last_sync_revision_number) and repository.last_sync_repomd_checksum == repomd_checksum) if is_optimized: optimize_data = dict(message="Optimizing Sync", code="optimizing.sync") with ProgressReport(**optimize_data) as optimize_pb: optimize_pb.done = 1 optimize_pb.save() return is_optimized
def test_is_previous_version(self): """Test version-comparator.""" # Versions must be int or 1.2.3 # non-integer versions return False, always # True if version <= target # None self.assertTrue(is_previous_version(None, "1")) self.assertTrue(is_previous_version("1", None)) self.assertTrue(is_previous_version(None, None)) # Integer versions # v = t : v < t : v > t self.assertTrue(is_previous_version("1", "1")) self.assertTrue(is_previous_version("1", "2")) self.assertFalse(is_previous_version("2", "1")) # m.n # v = t : v m. < t m. : v m.n < t m.n : v m. > t.m : v m.n > t.m.n self.assertTrue(is_previous_version("1.2", "1.2")) self.assertTrue(is_previous_version("1.2", "2.2")) self.assertTrue(is_previous_version("1.2", "1.3")) self.assertFalse(is_previous_version("2.2", "1.2")) self.assertFalse(is_previous_version("2.2", "2.1")) # non-numeric : v not-digits : t not-digits : v-dot-nondigits : t dot-non-digits self.assertFalse(is_previous_version("foo", "1.2")) self.assertFalse(is_previous_version("1.2", "bar")) self.assertFalse(is_previous_version("foo.2", "2.1")) self.assertFalse(is_previous_version("1.2", "bar.1")) self.assertTrue(is_previous_version("1.foo", "2.bar")) self.assertFalse(is_previous_version("1.foo", "1.bar"))
def resolve_advisory_conflict(previous_advisory, added_advisory): """ Decide which advisory to add to a repo version, create a new one if needed. No advisories with the same id can be present in a repo version. An existing advisory can be removed from a repo version, a newly added one can stay in a repo version, or advisories merge into newly created one which is added to a repo version. Merge is done based on criteria described below. 1. If updated_dates and update_version are the same and pkglist intersection is empty (e.g. base repo merged with debuginfo repo) -> new UpdateRecord content unit with combined pkglist is created. 2. If updated_dates or update_version differ and pkglist intersection is non-empty (update/re-sync/upload-new case) -> UpdateRecord with newer updated_date or update_version is added. 3. If updated_dates differ and pkglist intersection is empty: ERROR CONDITION (e.g. base and-debuginfo repos are from different versions, not at same date) 4. If update_dates and update_version are the same, pkglist intersection is non-empty and not equal to either pkglist - ERROR CONDITION! (never-happen case - "something is Terribly Wrong Here") Args: previous_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is in a previous repo version added_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is being added Returns: to_add(pulp_rpm.app.models.UpdateRecord): Advisory to add to a repo version, can be a newly created one to_remove(pulp_rpm.app.models.UpdateRecord): Advisory to remove from a repo version to_exclude(pulp_rpm.app.models.UpdateRecord): Advisory to exclude from the added set of content for a repo version """ to_add, to_remove, to_exclude = [], [], [] previous_updated_date = parse_datetime(previous_advisory.updated_date or previous_advisory.issued_date) added_updated_date = parse_datetime(added_advisory.updated_date or added_advisory.issued_date) previous_updated_version = previous_advisory.version added_updated_version = added_advisory.version previous_pkglist = set(previous_advisory.get_pkglist()) added_pkglist = set(added_advisory.get_pkglist()) # Prepare results of conditions for easier use. same_dates = previous_updated_date == added_updated_date same_version = previous_updated_version == added_updated_version pkgs_intersection = previous_pkglist.intersection(added_pkglist) if same_dates and same_version and pkgs_intersection: if previous_pkglist != added_pkglist: raise AdvisoryConflict( _('Incoming and existing advisories have the same id and ' 'timestamp but different and intersecting package lists. ' 'At least one of them is wrong. ' f'Advisory id: {previous_advisory.id}')) elif previous_pkglist == added_pkglist: # it means some advisory metadata changed without bumping the updated_date or version. # There is no way to find out which one is newer, and a user can't fix it, # so we are choosing the incoming advisory. to_remove.append(previous_advisory.pk) elif (not same_dates and not pkgs_intersection) or \ (same_dates and not same_version and not pkgs_intersection): raise AdvisoryConflict( _('Incoming and existing advisories have the same id but ' 'different timestamps and intersecting package lists. It is ' 'likely that they are from two different incompatible remote ' 'repositories. E.g. RHELX-repo and RHELY-debuginfo repo. ' 'Ensure that you are adding content for the compatible ' f'repositories. Advisory id: {previous_advisory.id}')) elif not same_dates and pkgs_intersection: if previous_updated_date < added_updated_date: to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) elif not same_version and pkgs_intersection: if is_previous_version(previous_updated_version, added_updated_version): to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) elif same_dates and same_version and not pkgs_intersection: # previous_advisory is used to copy the object and thus the variable refers to a # different object after `merge_advisories` call previous_advisory_pk = previous_advisory.pk merged_advisory = merge_advisories(previous_advisory, added_advisory) to_add.append(merged_advisory.pk) to_remove.append(previous_advisory_pk) to_exclude.append(added_advisory.pk) return to_add, to_remove, to_exclude
async def run(self): """ Build `DeclarativeContent` from the repodata. """ remote_url = self.new_url or self.remote.url remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/" optimize_sync = self.optimize progress_data = dict(message='Downloading Metadata Files', code='downloading.metadata') with ProgressReport(**progress_data) as metadata_pb: downloader = self.remote.get_downloader( url=urljoin(remote_url, 'repodata/repomd.xml')) # TODO: decide how to distinguish between a mirror list and a normal repo result = await downloader.run() metadata_pb.increment() repomd_path = result.path repomd = cr.Repomd(repomd_path) # Caution: we are not storing when the remote was last updated, so the order of this # logic must remain in this order where we first check the version number as other # changes than sync could have taken place such that the date or repo version will be # different from last sync if (optimize_sync and self.repository.last_sync_remote and self.remote.pk == self.repository.last_sync_remote.pk and (self.repository.last_sync_repo_version == self.repository.latest_version().number) and (self.remote.pulp_last_updated <= self.repository.latest_version().pulp_created) and is_previous_version( repomd.revision, self.repository.last_sync_revision_number)): optimize_data = dict(message='Optimizing Sync', code='optimizing.sync') with ProgressReport(**optimize_data) as optimize_pb: optimize_pb.done = 1 optimize_pb.save() return self.repository.last_sync_revision_number = repomd.revision if self.treeinfo: d_artifacts = [ DeclarativeArtifact( artifact=Artifact(), url=urljoin(remote_url, self.treeinfo["filename"]), relative_path=".treeinfo", remote=self.remote, deferred_download=False, ) ] for path, checksum in self.treeinfo["download"][ "images"].items(): artifact = Artifact(**checksum) da = DeclarativeArtifact( artifact=artifact, url=urljoin(remote_url, path), relative_path=path, remote=self.remote, deferred_download=self.deferred_download) d_artifacts.append(da) distribution_tree = DistributionTree( **self.treeinfo["distribution_tree"]) dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts) dc.extra_data = self.treeinfo await self.put(dc) package_repodata_urls = {} downloaders = [] modulemd_list = list() dc_groups = [] dc_categories = [] dc_environments = [] nevra_to_module = defaultdict(dict) pkgname_to_groups = defaultdict(list) group_to_categories = defaultdict(list) group_to_environments = defaultdict(list) optionalgroup_to_environments = defaultdict(list) modulemd_results = None comps_downloader = None main_types = set() checksums = {} for record in repomd.records: checksums[record.type] = record.checksum_type.upper() if record.type in PACKAGE_REPODATA: main_types.update([record.type]) package_repodata_urls[record.type] = urljoin( remote_url, record.location_href) elif record.type in UPDATE_REPODATA: updateinfo_url = urljoin(remote_url, record.location_href) downloader = self.remote.get_downloader(url=updateinfo_url) downloaders.append([downloader.run()]) elif record.type in COMPS_REPODATA: comps_url = urljoin(remote_url, record.location_href) comps_downloader = self.remote.get_downloader( url=comps_url) elif record.type in SKIP_REPODATA: continue elif '_zck' in record.type: continue elif record.type in MODULAR_REPODATA: modules_url = urljoin(remote_url, record.location_href) modulemd_downloader = self.remote.get_downloader( url=modules_url) modulemd_results = await modulemd_downloader.run() elif record.type not in PACKAGE_DB_REPODATA: file_data = { record.checksum_type: record.checksum, "size": record.size } da = DeclarativeArtifact( artifact=Artifact(**file_data), url=urljoin(remote_url, record.location_href), relative_path=record.location_href, remote=self.remote, deferred_download=False) repo_metadata_file = RepoMetadataFile( data_type=record.type, checksum_type=record.checksum_type, checksum=record.checksum, ) dc = DeclarativeContent(content=repo_metadata_file, d_artifacts=[da]) await self.put(dc) missing_type = set(PACKAGE_REPODATA) - main_types if missing_type: raise FileNotFoundError( _("XML file(s): {filename} not found").format( filename=", ".join(missing_type))) self.repository.original_checksum_types = checksums # we have to sync module.yaml first if it exists, to make relations to packages if modulemd_results: modulemd_index = mmdlib.ModuleIndex.new() open_func = gzip.open if modulemd_results.url.endswith( '.gz') else open with open_func(modulemd_results.path, 'r') as moduleyaml: content = moduleyaml.read() module_content = content if isinstance( content, str) else content.decode() modulemd_index.update_from_string(module_content, True) modulemd_names = modulemd_index.get_module_names() or [] modulemd_all = parse_modulemd(modulemd_names, modulemd_index) # Parsing modules happens all at one time, and from here on no useful work happens. # So just report that it finished this stage. modulemd_pb_data = { 'message': 'Parsed Modulemd', 'code': 'parsing.modulemds' } with ProgressReport(**modulemd_pb_data) as modulemd_pb: modulemd_total = len(modulemd_all) modulemd_pb.total = modulemd_total modulemd_pb.done = modulemd_total for modulemd in modulemd_all: artifact = modulemd.pop('artifact') relative_path = '{}{}{}{}{}snippet'.format( modulemd[PULP_MODULE_ATTR.NAME], modulemd[PULP_MODULE_ATTR.STREAM], modulemd[PULP_MODULE_ATTR.VERSION], modulemd[PULP_MODULE_ATTR.CONTEXT], modulemd[PULP_MODULE_ATTR.ARCH]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) modulemd_content = Modulemd(**modulemd) dc = DeclarativeContent(content=modulemd_content, d_artifacts=[da]) dc.extra_data = defaultdict(list) # dc.content.artifacts are Modulemd artifacts for artifact in dc.content.artifacts: nevra_to_module.setdefault(artifact, set()).add(dc) modulemd_list.append(dc) # delete list now that we're done with it for memory savings del modulemd_all modulemd_default_names = parse_defaults(modulemd_index) # Parsing module-defaults happens all at one time, and from here on no useful # work happens. So just report that it finished this stage. modulemd_defaults_pb_data = { 'message': 'Parsed Modulemd-defaults', 'code': 'parsing.modulemd_defaults' } with ProgressReport( **modulemd_defaults_pb_data) as modulemd_defaults_pb: modulemd_defaults_total = len(modulemd_default_names) modulemd_defaults_pb.total = modulemd_defaults_total modulemd_defaults_pb.done = modulemd_defaults_total for default in modulemd_default_names: artifact = default.pop('artifact') relative_path = '{}{}snippet'.format( default[PULP_MODULEDEFAULTS_ATTR.MODULE], default[PULP_MODULEDEFAULTS_ATTR.STREAM]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) default_content = ModulemdDefaults(**default) dc = DeclarativeContent(content=default_content, d_artifacts=[da]) await self.put(dc) # delete list now that we're done with it for memory savings del modulemd_default_names if comps_downloader: comps_result = await comps_downloader.run() comps = libcomps.Comps() comps.fromxml_f(comps_result.path) with ProgressReport(message='Parsed Comps', code='parsing.comps') as comps_pb: comps_total = (len(comps.groups) + len(comps.categories) + len(comps.environments)) comps_pb.total = comps_total comps_pb.done = comps_total if comps.langpacks: langpack_dict = PackageLangpacks.libcomps_to_dict( comps.langpacks) packagelangpack = PackageLangpacks( matches=strdict_to_dict(comps.langpacks), digest=dict_digest(langpack_dict)) dc = DeclarativeContent(content=packagelangpack) dc.extra_data = defaultdict(list) await self.put(dc) if comps.categories: for category in comps.categories: category_dict = PackageCategory.libcomps_to_dict( category) category_dict['digest'] = dict_digest(category_dict) packagecategory = PackageCategory(**category_dict) dc = DeclarativeContent(content=packagecategory) dc.extra_data = defaultdict(list) if packagecategory.group_ids: for group_id in packagecategory.group_ids: group_to_categories[group_id['name']].append( dc) dc_categories.append(dc) if comps.environments: for environment in comps.environments: environment_dict = PackageEnvironment.libcomps_to_dict( environment) environment_dict['digest'] = dict_digest( environment_dict) packageenvironment = PackageEnvironment( **environment_dict) dc = DeclarativeContent(content=packageenvironment) dc.extra_data = defaultdict(list) if packageenvironment.option_ids: for option_id in packageenvironment.option_ids: optionalgroup_to_environments[ option_id['name']].append(dc) if packageenvironment.group_ids: for group_id in packageenvironment.group_ids: group_to_environments[group_id['name']].append( dc) dc_environments.append(dc) if comps.groups: for group in comps.groups: group_dict = PackageGroup.libcomps_to_dict(group) group_dict['digest'] = dict_digest(group_dict) packagegroup = PackageGroup(**group_dict) dc = DeclarativeContent(content=packagegroup) dc.extra_data = defaultdict(list) if packagegroup.packages: for package in packagegroup.packages: pkgname_to_groups[package['name']].append(dc) if dc.content.id in group_to_categories.keys(): for dc_category in group_to_categories[ dc.content.id]: dc.extra_data['category_relations'].append( dc_category) dc_category.extra_data['packagegroups'].append( dc) if dc.content.id in group_to_environments.keys(): for dc_environment in group_to_environments[ dc.content.id]: dc.extra_data['environment_relations'].append( dc_environment) dc_environment.extra_data[ 'packagegroups'].append(dc) if dc.content.id in optionalgroup_to_environments.keys( ): for dc_environment in optionalgroup_to_environments[ dc.content.id]: dc.extra_data['env_relations_optional'].append( dc_environment) dc_environment.extra_data[ 'optionalgroups'].append(dc) dc_groups.append(dc) for dc_category in dc_categories: await self.put(dc_category) for dc_environment in dc_environments: await self.put(dc_environment) # delete lists now that we're done with them for memory savings del dc_environments del dc_categories # to preserve order, downloaders are created after all repodata urls are identified package_repodata_downloaders = [] for repodata_type in PACKAGE_REPODATA: downloader = self.remote.get_downloader( url=package_repodata_urls[repodata_type]) package_repodata_downloaders.append(downloader.run()) downloaders.append(package_repodata_downloaders) # asyncio.gather is used to preserve the order of results for package repodata pending = [ asyncio.gather(*downloaders_group) for downloaders_group in downloaders ] while pending: done, pending = await asyncio.wait( pending, return_when=asyncio.FIRST_COMPLETED) for downloader in done: try: results = downloader.result() except ClientResponseError as exc: raise HTTPNotFound( reason=_("File not found: {filename}").format( filename=exc.request_info.url)) if results[0].url == package_repodata_urls['primary']: primary_xml_path = results[0].path filelists_xml_path = results[1].path other_xml_path = results[2].path metadata_pb.done += 3 metadata_pb.save() packages = await RpmFirstStage.parse_repodata( primary_xml_path, filelists_xml_path, other_xml_path) # skip SRPM if defined if 'srpm' in self.skip_types: packages = { pkgId: pkg for pkgId, pkg in packages.items() if pkg.arch != 'src' } progress_data = { 'message': 'Parsed Packages', 'code': 'parsing.packages', 'total': len(packages), } with ProgressReport(**progress_data) as packages_pb: for pkg in packages.values(): package = Package( **Package.createrepo_to_dict(pkg)) artifact = Artifact(size=package.size_package) checksum_type = getattr( CHECKSUM_TYPES, package.checksum_type.upper()) setattr(artifact, checksum_type, package.pkgId) url = urljoin(remote_url, package.location_href) filename = os.path.basename( package.location_href) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=filename, remote=self.remote, deferred_download=self.deferred_download) dc = DeclarativeContent(content=package, d_artifacts=[da]) dc.extra_data = defaultdict(list) # find if a package relates to a modulemd if dc.content.nevra in nevra_to_module.keys(): dc.content.is_modular = True for dc_modulemd in nevra_to_module[ dc.content.nevra]: dc.extra_data[ 'modulemd_relation'].append( dc_modulemd) dc_modulemd.extra_data[ 'package_relation'].append(dc) if dc.content.name in pkgname_to_groups.keys(): for dc_group in pkgname_to_groups[ dc.content.name]: dc.extra_data[ 'group_relations'].append(dc_group) dc_group.extra_data[ 'related_packages'].append(dc) packages_pb.increment() await self.put(dc) elif results[0].url == updateinfo_url: updateinfo_xml_path = results[0].path metadata_pb.increment() updates = await RpmFirstStage.parse_updateinfo( updateinfo_xml_path) progress_data = { 'message': 'Parsed Advisories', 'code': 'parsing.advisories', 'total': len(updates), } with ProgressReport(**progress_data) as advisories_pb: for update in updates: update_record = UpdateRecord( **UpdateRecord.createrepo_to_dict(update)) update_record.digest = hash_update_record( update) future_relations = { 'collections': defaultdict(list), 'references': [] } for collection in update.collections: coll_dict = UpdateCollection.createrepo_to_dict( collection) coll = UpdateCollection(**coll_dict) for package in collection.packages: pkg_dict = UpdateCollectionPackage.createrepo_to_dict( package) pkg = UpdateCollectionPackage( **pkg_dict) future_relations['collections'][ coll].append(pkg) for reference in update.references: reference_dict = UpdateReference.createrepo_to_dict( reference) ref = UpdateReference(**reference_dict) future_relations['references'].append(ref) advisories_pb.increment() dc = DeclarativeContent(content=update_record) dc.extra_data = future_relations await self.put(dc) # now send modules down the pipeline since all relations have been set up for modulemd in modulemd_list: await self.put(modulemd) for dc_group in dc_groups: await self.put(dc_group)
def resolve_advisory_conflict(previous_advisory, added_advisory): """ Decide which advisory to add to a repo version, create a new one if needed. No advisories with the same id can be present in a repo version. An existing advisory can be removed from a repo version, a newly added one can stay in a repo version, or advisories merge into newly created one which is added to a repo version. Merge is done based on criteria described below. 1. If updated_dates and update_version are the same and pkglist intersection is empty (e.g. base repo merged with debuginfo repo) -> new UpdateRecord content unit with combined pkglist is created. 2. If updated_dates or update_version differ and pkglist intersection is non-empty (update/re-sync/upload-new case) -> UpdateRecord with newer updated_date or update_version is added. 3. If updated_dates differ and pkglist intersection is empty: 3.a If pklists differ only IN EVR (ie, name-intersection is Not Empty) -> use-newer 3.b else -> ERROR CONDITION (e.g. base and-debuginfo repos are from different versions, not at same date) 4. If update_dates and update_version are the same, pkglist intersection is non-empty and not a proper subset of to either pkglist - ERROR CONDITION! (never-happen case - "something is Terribly Wrong Here") Args: previous_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is in a previous repo version added_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is being added Returns: to_add(list): UUIDs of advisories to add to a repo version, can be newly created ones to_remove(list): UUIDs of advisories to remove from a repo version to_exclude(list): UUIDs of advisories to exclude from the added set of content for a repo version """ def _datetime_heuristics(in_str): # issue- and update-dates can be datetimes, empty, or timetamps. Alas. # Try to Do The Right Thing. # Return None if we give up if not in_str: return None dt = parse_datetime(in_str) if not dt: try: tstamp = int(in_str) dt = datetime.fromtimestamp(tstamp) except: # noqa # No idea what this is - give up and return None return None return dt def _do_merge(): # previous_advisory is used to copy the object and thus the variable refers to a # different object after `merge_advisories` call previous_advisory_pk = previous_advisory.pk merged_advisory = merge_advisories(previous_advisory, added_advisory) to_add.append(merged_advisory.pk) to_remove.append(previous_advisory_pk) to_exclude.append(added_advisory.pk) def _name_intersect(prev_pkgs, new_pkgs): prev_names = set([x[0] for x in prev_pkgs]) new_names = set([x[0] for x in new_pkgs]) return prev_names.intersection(new_names) to_add, to_remove, to_exclude = [], [], [] previous_updated_date = _datetime_heuristics( previous_advisory.updated_date or previous_advisory.issued_date) added_updated_date = _datetime_heuristics(added_advisory.updated_date or added_advisory.issued_date) previous_updated_version = previous_advisory.version added_updated_version = added_advisory.version previous_pkglist = set(previous_advisory.get_pkglist()) added_pkglist = set(added_advisory.get_pkglist()) # Prepare results of conditions for easier use. same_dates = previous_updated_date == added_updated_date same_version = previous_updated_version == added_updated_version pkgs_intersection = previous_pkglist.intersection(added_pkglist) names_intersection = _name_intersect(previous_pkglist, added_pkglist) if same_dates and same_version and pkgs_intersection: if previous_pkglist != added_pkglist: # prev and new have different pkg-lists. See if one is a proper-subset of the other; # if so, choose the one with the *larger* pkglist. Otherwise, error. if previous_pkglist < added_pkglist: # new has more pkgs - remove previous to_remove.append(previous_advisory.pk) elif added_pkglist < previous_pkglist: # prev has more pkgs - exclude new to_exclude.append(added_advisory.pk) else: if settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION: _do_merge() else: raise AdvisoryConflict( _("Incoming and existing advisories have the same id and timestamp " "but different and intersecting package lists, " "and neither package list is a proper subset of the other. " "At least one of the advisories is wrong. " "To allow this behavior, set " "ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION = True (q.v.) " "in your configuration. Advisory id: {}").format( previous_advisory.id)) elif previous_pkglist == added_pkglist: # it means some advisory metadata changed without bumping the updated_date or version. # There is no way to find out which one is newer, and a user can't fix it, # so we are choosing the incoming advisory. to_remove.append(previous_advisory.pk) elif (not same_dates or (same_dates and not same_version)) and not pkgs_intersection: if names_intersection or settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION: # Keep "newer" advisory if not same_dates: if previous_updated_date < added_updated_date: to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) elif not same_version: if is_previous_version(previous_updated_version, added_updated_version): to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) else: raise AdvisoryConflict( _("Incoming and existing advisories have the same id but " "different timestamps and non-intersecting package lists. " "It is likely that they are from two different incompatible remote " "repositories. E.g. RHELX-repo and RHELY-debuginfo repo. " "Ensure that you are adding content for the compatible repositories. " "To allow this behavior, set " "ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION = True (q.v.) " "in your configuration. Advisory id: {}").format( previous_advisory.id)) elif not same_dates and pkgs_intersection: if previous_updated_date < added_updated_date: to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) elif not same_version and pkgs_intersection: if is_previous_version(previous_updated_version, added_updated_version): to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) elif same_dates and same_version and not pkgs_intersection: _do_merge() return to_add, to_remove, to_exclude
def resolve_advisory_conflict(previous_advisory, added_advisory): """ Decide which advisory to add to a repo version, create a new one if needed. No advisories with the same id can be present in a repo version. An existing advisory can be removed from a repo version, a newly added one can stay in a repo version, or advisories merge into newly created one which is added to a repo version. Merge is done based on criteria described below. 1. If updated_dates and update_version are the same and pkglist intersection is empty (e.g. base repo merged with debuginfo repo) -> new UpdateRecord content unit with combined pkglist is created. 2. If updated_dates or update_version differ and pkglist intersection is non-empty (update/re-sync/upload-new case) -> UpdateRecord with newer updated_date or update_version is added. 3. If updated_dates differ and pkglist intersection is empty: ERROR CONDITION (e.g. base and-debuginfo repos are from different versions, not at same date) 4. If update_dates and update_version are the same, pkglist intersection is non-empty and not equal to either pkglist - ERROR CONDITION! (never-happen case - "something is Terribly Wrong Here") Args: previous_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is in a previous repo version added_advisory(pulp_rpm.app.models.UpdateRecord): Advisory which is being added Returns: to_add(pulp_rpm.app.models.UpdateRecord): Advisory to add to a repo version, can be a newly created one to_remove(pulp_rpm.app.models.UpdateRecord): Advisory to remove from a repo version to_exclude(pulp_rpm.app.models.UpdateRecord): Advisory to exclude from the added set of content for a repo version """ def _do_merge(): # previous_advisory is used to copy the object and thus the variable refers to a # different object after `merge_advisories` call previous_advisory_pk = previous_advisory.pk merged_advisory = merge_advisories(previous_advisory, added_advisory) to_add.append(merged_advisory.pk) to_remove.append(previous_advisory_pk) to_exclude.append(added_advisory.pk) to_add, to_remove, to_exclude = [], [], [] previous_updated_date = parse_datetime(previous_advisory.updated_date or previous_advisory.issued_date) added_updated_date = parse_datetime(added_advisory.updated_date or added_advisory.issued_date) previous_updated_version = previous_advisory.version added_updated_version = added_advisory.version previous_pkglist = set(previous_advisory.get_pkglist()) added_pkglist = set(added_advisory.get_pkglist()) # Prepare results of conditions for easier use. same_dates = previous_updated_date == added_updated_date same_version = previous_updated_version == added_updated_version pkgs_intersection = previous_pkglist.intersection(added_pkglist) if same_dates and same_version and pkgs_intersection: if previous_pkglist != added_pkglist: # prev and new have different pkg-lists. See if one is a proper-subset of the other; # if so, choose the one with the *larger* pkglist. Otherwise, error. if previous_pkglist < added_pkglist: # new has more pkgs - remove previous to_remove.append(previous_advisory.pk) elif added_pkglist < previous_pkglist: # prev has more pkgs - exclude new to_exclude.append(added_advisory.pk) else: if settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION: _do_merge() else: raise AdvisoryConflict( _("Incoming and existing advisories have the same id and timestamp " "but different and intersecting package lists, " "and neither package list is a proper subset of the other. " "At least one of the advisories is wrong. " "Advisory id: {}").format(previous_advisory.id)) elif previous_pkglist == added_pkglist: # it means some advisory metadata changed without bumping the updated_date or version. # There is no way to find out which one is newer, and a user can't fix it, # so we are choosing the incoming advisory. to_remove.append(previous_advisory.pk) elif (not same_dates or (same_dates and not same_version)) and not pkgs_intersection: if settings.ALLOW_AUTOMATIC_UNSAFE_ADVISORY_CONFLICT_RESOLUTION: to_remove.append(previous_advisory.pk) else: raise AdvisoryConflict( _("Incoming and existing advisories have the same id but " "different timestamps and non-intersecting package lists. " "It is likely that they are from two different incompatible remote " "repositories. E.g. RHELX-repo and RHELY-debuginfo repo. " "Ensure that you are adding content for the compatible repositories. " "Advisory id: {}").format(previous_advisory.id)) elif not same_dates and pkgs_intersection: if previous_updated_date < added_updated_date: to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) elif not same_version and pkgs_intersection: if is_previous_version(previous_updated_version, added_updated_version): to_remove.append(previous_advisory.pk) else: to_exclude.append(added_advisory.pk) elif same_dates and same_version and not pkgs_intersection: _do_merge() return to_add, to_remove, to_exclude