def get_affected(self): """Examine the diffs between `old_pmap` and `new_pmap` and return sorted lists of affected instruments and types. Returns { affected_instrument : { affected_type, ... } } """ instrs = defaultdict(set) diffs = self.mapping_diffs() diffs = remove_boring(diffs) for diff in diffs: for step in diff: # Walking down the diff steps 1-by-1 eventually hits an rmap comparison which # will define both instrument and type. pmaps and imaps leave at least one blank. if len(step) == 2 and config.is_mapping(step[0]): instrument, filekind = utils.get_file_properties(self.observatory, step[0]) # This is inefficient since diff doesn't vary by step, but set logic cleans up the redundancy # New rmaps imply reprocessing the entire type. elif isinstance(diff[-1],str) and diff[-1].startswith(("added","deleted")) and \ diff[-1].endswith(".rmap'"): rmap_name = diff[-1].split()[-1].replace("'","") rmapping = rmap.fetch_mapping(rmap_name, ignore_checksum=True) instrument, filekind = rmapping.instrument, rmapping.filekind if instrument.strip() and filekind.strip(): if filekind not in instrs[instrument]: log.verbose("Affected", (instrument, filekind), "based on diff", diff, verbosity=20) instrs[instrument].add(filekind) return { key:list(val) for (key, val) in instrs.items() }
def properties_inside_mapping(filename): """Load `filename`s mapping header to discover and return (instrument, filekind). >>> properties_inside_mapping('tests/data/roman_0001.pmap') ('', '') >>> properties_inside_mapping('tests/data/roman_wfi_flat_0004.rmap') ('wfi', 'flat') >>> properties_inside_mapping('tests/data/roman_wfi_0001.imap') ('wfi', '') >>> properties_inside_mapping('tests/data/roman_wfi_flat_0004.rmap') ('wfi', 'flat') """ map = rmap.fetch_mapping(filename) if map.mapping == "pipeline": result = "", "" elif map.mapping == "instrument": result = map.instrument, "" else: result = map.instrument, map.filekind return result
def properties_inside_mapping(filename): """Load `filename`s mapping header to discover and return (instrument, filekind). >>> from crds.tests import test_config >>> old_config = test_config.setup() >>> properties_inside_mapping("hst.pmap") ('', '') >>> properties_inside_mapping("hst_acs.imap") ('acs', '') >>> properties_inside_mapping("hst_acs_darkfile.rmap") ('acs', 'darkfile') >>> test_config.cleanup(old_config) """ loaded = rmap.fetch_mapping(filename) if loaded.mapping == "pipeline": result = "", "" elif loaded.mapping == "instrument": result = loaded.instrument, "" else: result = loaded.instrument, loaded.filekind return result
def get_affected(self): """Examine the diffs between `old_pmap` and `new_pmap` and return sorted lists of affected instruments and types. Returns { affected_instrument : { affected_type, ... } } """ instrs = defaultdict(set) diffs = self.mapping_diffs() diffs = remove_boring(diffs) for diff in diffs: for step in diff: # Walking down the diff steps 1-by-1 eventually hits an rmap comparison which # will define both instrument and type. pmaps and imaps leave at least one blank. if len(step) == 2 and rmap.is_mapping(step[0]): instrument, filekind = utils.get_file_properties(self.observatory, step[0]) # This is inefficient since diff doesn't vary by step, but set logic cleans up the redundancy # New rmaps imply reprocessing the entire type. elif isinstance(diff[-1],str) and diff[-1].startswith(("added","deleted")) and \ diff[-1].endswith(".rmap'"): rmap_name = diff[-1].split()[-1].replace("'","") rmapping = rmap.fetch_mapping(rmap_name, ignore_checksum=True) instrument, filekind = rmapping.instrument, rmapping.filekind if instrument.strip() and filekind.strip(): if filekind not in instrs[instrument]: log.verbose("Affected", (instrument, filekind), "based on diff", diff, verbosity=20) instrs[instrument].add(filekind) return { key:list(val) for (key, val) in instrs.items() }
def mapping_diffs(self): """Return the logical differences between CRDS mappings named `old_file` and `new_file`. IFF include_header_diffs, include differences in mapping headers. Some are "boring", e.g. sha1sum or name. IFF recurse_added_deleted, include difference tuples for all nested adds and deletes whenever a higher level mapping is added or deleted. Else, only include the higher level mapping, not contained files. """ # At this time, the fetch_mapping path parameter appears to exist only to thwart CRDS mapping caching. old_map = rmap.fetch_mapping(self.locate_file1(self.old_file), ignore_checksum=True, path=self.mappings_cache1) new_map = rmap.fetch_mapping(self.locate_file2(self.new_file), ignore_checksum=True, path=self.mappings_cache2) differences = old_map.difference(new_map, include_header_diffs=self.include_header_diffs, recurse_added_deleted=self.recurse_added_deleted) return differences
def mapping_diffs(self): """Return the logical differences between CRDS mappings named `old_file` and `new_file`. IFF include_header_diffs, include differences in mapping headers. Some are "boring", e.g. sha1sum or name. IFF recurse_added_deleted, include difference tuples for all nested adds and deletes whenever a higher level mapping is added or deleted. Else, only include the higher level mapping, not contained files. """ # At this time, the fetch_mapping path parameter appears to exist only to thwart CRDS mapping caching. old_map = rmap.fetch_mapping(self.locate_file1(self.old_file), ignore_checksum=True, path=self.mappings_cache1) new_map = rmap.fetch_mapping(self.locate_file2(self.new_file), ignore_checksum=True, path=self.mappings_cache2) differences = old_map.difference(new_map, include_header_diffs=self.include_header_diffs, recurse_added_deleted=self.recurse_added_deleted) return differences
def properties_inside_mapping(filename): """Load `filename`s mapping header to discover and return (instrument, filekind). >>> from crds.tests import test_config >>> old_config = test_config.setup() >>> properties_inside_mapping("hst.pmap") ('', '') >>> properties_inside_mapping("hst_acs.imap") ('acs', '') >>> properties_inside_mapping("hst_acs_darkfile.rmap") ('acs', 'darkfile') >>> test_config.cleanup(old_config) """ loaded = rmap.fetch_mapping(filename) if loaded.mapping == "pipeline": result = "", "" elif loaded.mapping == "instrument": result = loaded.instrument, "" else: result = loaded.instrument, loaded.filekind return result
def properties_inside_mapping(filename): """Load `filename`s mapping header to discover and return (instrument, filekind). """ map = rmap.fetch_mapping(filename) if map.filekind == "PIPELINE": result = "", "" elif map.filekind == "INSTRUMENT": result = map.instrument, "" else: result = map.instrument, map.filekind return result
def properties_inside_mapping(filename): """Load `filename`s mapping header to discover and return (instrument, filekind). """ map = rmap.fetch_mapping(filename) if map.mapping == "pipeline": result = "", "" elif map.mapping == "instrument": result = map.instrument, "" else: result = map.instrument, map.filekind return result
def properties_inside_mapping(filename): """Load `filename`s mapping header to discover and return (instrument, filekind). """ mapping = rmap.fetch_mapping(filename) if mapping.filekind == "PIPELINE": result = "", "" elif mapping.filekind == "INSTRUMENT": result = mapping.instrument, "" else: result = mapping.instrument, mapping.filekind return result
def update_derivation(new_path, old_basename=None): """Set the 'derived_from' and 'name' header fields of `new_path`. This function works for all Mapping classes: pmap, imap, and rmap. """ new = rmap.fetch_mapping(new_path) if old_basename is None: # assume new is a copy of old, with old's name in header derived_from = new.name else: derived_from = old_basename new.header["derived_from"] = str(derived_from) new.header["name"] = str(os.path.basename(new_path)) new.write(new_path) return str(derived_from)
def update_derivation(new_path, old_basename=None): """Set the 'derived_from' and 'name' header fields of `new_path`. This function works for all Mapping classes: pmap, imap, and rmap. """ new = rmap.fetch_mapping(new_path) if old_basename is None: # assume new is a copy of old, with old's name in header derived_from = new.name else: derived_from = old_basename new.header["derived_from"] = str(derived_from) new.header["name"] = str(os.path.basename(new_path)) new.write(new_path) return str(derived_from)
def rmap_delete_references(old_rmap, new_rmap, deleted_references): """Given the full path of starting rmap `old_rmap`, modify it by deleting all files in `deleted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. Return new ReferenceMapping named `new_rmap` """ new = old = rmap.fetch_mapping(old_rmap, ignore_checksum=True) for reference in deleted_references: log.info("Deleting", repr(reference), "from", repr(new.name)) new = new.delete(reference) new.header["derived_from"] = old.basename log.verbose("Writing", repr(new_rmap)) new.write(new_rmap) formatted = new.format() for reference in deleted_references: reference = os.path.basename(reference) assert reference not in formatted, \ "Rules update failure. Deleted" + repr(reference) + " still appears in new rmap." return new
def rmap_insert_references(old_rmap, new_rmap, inserted_references): """Given the full path of starting rmap `old_rmap`, modify it by inserting or replacing all files in `inserted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. Return new ReferenceMapping named `new_rmap` """ new = old = rmap.fetch_mapping(old_rmap, ignore_checksum=True) new.header["derived_from"] = old.basename for reference in inserted_references: baseref = os.path.basename(reference) with log.augment_exception("In reference", srepr(baseref)): log.info("Inserting", srepr(baseref), "into", srepr(new.name)) new = new.insert_reference(reference) log.verbose("Writing", srepr(new_rmap)) new.write(new_rmap) formatted = new.format() for reference in inserted_references: reference = os.path.basename(reference) assert reference in formatted, \ "Rules update failure. " + srepr(reference) + " does not appear in new rmap." \ " May be identical match with other submitted references." return new
def rmap_insert_references(old_rmap, new_rmap, inserted_references): """Given the full path of starting rmap `old_rmap`, modify it by inserting or replacing all files in `inserted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. Return new ReferenceMapping named `new_rmap` """ new = old = rmap.fetch_mapping(old_rmap, ignore_checksum=True) new.header["derived_from"] = old.basename for reference in inserted_references: baseref = os.path.basename(reference) with log.augment_exception("In reference", srepr(baseref)): log.info("Inserting", srepr(baseref), "into", srepr(new.name)) new = new.insert_reference(reference) log.verbose("Writing", srepr(new_rmap)) new.write(new_rmap) formatted = new.format() for reference in inserted_references: reference = os.path.basename(reference) assert reference in formatted, \ "Rules update failure. " + srepr(reference) + " does not appear in new rmap." \ " May be identical match with other submitted references." return new
def rmap_insert_references(old_rmap, new_rmap, inserted_references): """Given the full path of starting rmap `old_rmap`, modify it by inserting or replacing all files in `inserted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. old_rmap str Filepath of source rmap into which `inserted_references` will be inserted new_rmap str Filepath of updated rmap written out inserted_references [ str, ...] List of reference filepaths to be insterted Note that "inserting" a reference file can result in: 1. adding a new match case, 2. adding a new USEAFTER case 3. exactly replacing an existing reference file. Other outcomes are also possible for non-standard rmap selector class configurations. Additional checking: 1. Generates an ERROR if any of the inserted reference files have identical matching criteria since only one file with those criteria would be added to the rmap and the other(s) would be "replaced" by their own insertion set. Note: it is valid/common for an inserted reference to replace a reference which is already in `old_rmap`. This ERROR only applies to equalities within the inserted_references list. 2. Generates a WARNING if the matching criteria of any inserted reference file is a proper subset of inserted or existing references. Thes subsets will generally lead to the addition of new matching cases. Since CRDS inherited instances of these "subset overlaps" from HST CDBS, this warning is only visible with --verbose for HST, they exist. Since this condition is bad both for understanding rmaps and for runtime complexity and performance, for JWST the warning is visible without --verbose and will also generate a runtime ERROR. For JWST there is the expectation that an offending file submission will either be (a) cancelled and corrected or (b) provisionally accepted followed by an immediate manual rmap correction. Provisional acceptance gives the option of f keeping the work associated with large deliveries where the corrective measure might be to manually merge overlapping categories with rmap edits. Return None, `new_rmap` is already the implicit result """ new = old = rmap.fetch_mapping(old_rmap, ignore_checksum=True) inserted_cases = {} for reference in inserted_references: log.info("Inserting", os.path.basename(reference), "into", repr(new.name)) new = new.insert_reference(reference) baseref = os.path.basename(reference) with log.warn_on_exception("Failed checking rmap update for", repr(baseref)): cases = new.file_matches(baseref) for fullcase in cases: case = fullcase[1:] if case not in inserted_cases: inserted_cases[case] = baseref else: log.error( "-" * 40 + "\nBoth", srepr(baseref), "and", srepr(inserted_cases[case]), "identically match case:\n", log.PP(case), """ Each reference would replace the other in the rmap. Either reference file matching parameters need correction or additional matching parameters should be added to the rmap to enable CRDS to differentiate between the two files. See the file submission section of the CRDS server user's guide here: https://jwst-crds.stsci.edu/static/users_guide/index.html for more explanation.""")