def print_affected_modes(self): """Print out all the affected mode tuples associated with the differences.""" assert config.is_mapping(self.old_file) and config.is_mapping(self.new_file), \ "for --print-affected-modes both files must be mappings." modes = mapping_affected_modes(self.old_file, self.new_file, self.args.include_header_diffs) for affected in modes: print(format_affected_mode(affected)) return 1 if modes else 0
def __init__(self, *args, **keys): super(MappingDifferencer, self).__init__(*args, **keys) assert config.is_mapping(self.old_file), \ "File " + repr(self.old_file) + " is not a CRDS mapping." assert config.is_mapping(self.new_file), \ "File " + repr(self.new_file) + " is not a CRDS mapping." assert os.path.splitext(self.old_file)[-1] == os.path.splitext(self.new_file)[-1], \ "Files " + repr(self.old_file) + " and " + repr(self.new_file) + \ " are not the same kind of CRDS mapping: .pmap, .imap, .rmap"
def print_all_new_files(self): """Print the names of all files which are in `new_file` (or any intermediary context) but not in `old_file`. new_file > old_file. Both new_file and old_file are similar mappings. """ updated = get_updated_files(self.old_file, self.new_file) for mapping in updated: if config.is_mapping(mapping): print(mapping, self.instrument_filekind(mapping)) for reference in updated: if not config.is_mapping(reference): print(reference, self.instrument_filekind(reference)) return 1 if updated else 0
def get_affected(self): """Examine the diffs between `old_pmap` and `new_pmap` and return sorted lists of affected instruments and types. Returns { affected_instrument : { affected_type, ... } } """ instrs = defaultdict(set) diffs = self.mapping_diffs() diffs = remove_boring(diffs) for diff in diffs: for step in diff: # Walking down the diff steps 1-by-1 eventually hits an rmap comparison which # will define both instrument and type. pmaps and imaps leave at least one blank. if len(step) == 2 and config.is_mapping(step[0]): instrument, filekind = utils.get_file_properties(self.observatory, step[0]) # This is inefficient since diff doesn't vary by step, but set logic cleans up the redundancy # New rmaps imply reprocessing the entire type. elif isinstance(diff[-1],str) and diff[-1].startswith(("added","deleted")) and \ diff[-1].endswith(".rmap'"): rmap_name = diff[-1].split()[-1].replace("'","") rmapping = rmap.fetch_mapping(rmap_name, ignore_checksum=True) instrument, filekind = rmapping.instrument, rmapping.filekind if instrument.strip() and filekind.strip(): if filekind not in instrs[instrument]: log.verbose("Affected", (instrument, filekind), "based on diff", diff, verbosity=20) instrs[instrument].add(filekind) return { key:list(val) for (key, val) in instrs.items() }
def get_flex_uri(filename, observatory=None): """If environment variables define the base URI for `filename`, append filename and return the combined URI. If no environment override has been specified, obtain the base URI from the server_info config, append filename, and return the combined URI. If `filename` is a config file and no environment override is defined, return "none". """ if observatory is None: observatory = get_default_observatory() uri = config.get_uri(filename) if uri == "none": info = get_server_info() if config.is_config(filename): uri = _unpack_info(info, "config_url", observatory) elif config.is_pickle(filename): uri = _unpack_info(info, "pickle_url", observatory) elif config.is_mapping(filename): uri = _unpack_info(info, "mapping_url", observatory) elif config.is_reference(filename): uri = _unpack_info(info, "reference_url", observatory) else: raise CrdsError("Can't identify file type for:", srepr(filename)) if uri == "none": return uri if not uri.endswith("/"): uri += "/" uri += filename return uri
def get_file_properties(filename): """Figure out (instrument, filekind) based on `filename` which should be a mapping or FITS reference file. >>> get_file_properties("./hst_acs_biasfile_0001.rmap") ('acs', 'biasfile') >> get_file_properties("./hst_acs_biasfile_0001.pmap") Traceback (most recent call last): ... IOError: [Errno 2] No such file or directory: './hst_acs_biasfile_0001.pmap' >> get_file_properties("test_data/s7g1700gl_dead.fits") """ if data_file.is_geis_data(filename): # determine GEIS data file properties from corresponding header file. filename = filename[:-1] + "h" if config.is_mapping(filename): try: return decompose_newstyle_name(filename)[2:4] except Exception: return properties_inside_mapping(filename) elif config.is_reference(filename): result = get_reference_properties(filename)[2:4] else: try: result = properties_inside_mapping(filename) except Exception: result = get_reference_properties(filename)[2:4] assert result[0] in INSTRUMENTS+[""], "Bad instrument " + \ repr(result[0]) + " in filename " + repr(filename) assert result[1] in FILEKINDS+[""], "Bad filekind " + \ repr(result[1]) + " in filename " + repr(filename) return result
def get_file_properties(filename): """Figure out (instrument, filekind, serial) based on `filename` which should be a mapping or FITS reference file. >> get_file_properties("./hst_acs_biasfile_0001.rmap") ('acs', 'biasfile') >> get_file_properties("./hst_acs_biasfile_0001.pmap") Traceback (most recent call last): ... AssertionError: Invalid .pmap filename './hst_acs_biasfile_0001.pmap' >> get_file_properties("test_data/s7g1700gl_dead.fits") """ if config.is_mapping(filename): try: return decompose_newstyle_name(filename)[2:4] except Exception: # NOTE: load_mapping more conservative than fetch_mapping used in properties_from_mapping mapping = rmap.load_mapping(filename) return mapping.instrument, mapping.filekind elif config.is_reference(filename): result = get_reference_properties(filename)[2:4] else: try: result = properties_inside_mapping(filename) except Exception as exc: result = get_reference_properties(filename)[2:4] assert result[0] in INSTRUMENTS+[""], "Bad instrument " + \ repr(result[0]) + " in filename " + repr(filename) assert result[1] in FILEKINDS+[""], "Bad filekind " + \ repr(result[1]) + " in filename " + repr(filename) return result
def mapping_pairs(differences): """Return the sorted list of all mapping tuples found in differences.""" pairs = set() for diff in differences: for pair in diff: if len(pair) == 2 and config.is_mapping(pair[0]): pairs.add(pair) return sorted(pairs)
def _find_diff_str(self, diff_str): """Return True IFF `diff_str` is in some rmap diff.""" diffs = self.mapping_diffs() diffs = remove_boring(diffs) for diff in diffs: for step in diff: if len(step) == 2 and config.is_mapping(step[0]): if diff_str in diff_action(diff): log.verbose("Found", repr(diff_str), "diff between", repr(step[0:1])) return True return False
def dump_files(pipeline_context, files, ignore_cache=False, raise_exceptions=True): """Unified interface to dump any file in `files`, mapping or reference. Returns localpaths, downloads count, bytes downloaded """ if files is None: files = get_mapping_names(pipeline_context) mappings = [ os.path.basename(name) for name in files if config.is_mapping(name) ] references = [ os.path.basename(name) for name in files if not config.is_mapping(name) ] if mappings: m_paths, m_downloads, m_bytes = dump_mappings3( pipeline_context, mappings=mappings, ignore_cache=ignore_cache, raise_exceptions=raise_exceptions) else: m_paths, m_downloads, m_bytes = {}, 0, 0 if references: r_paths, r_downloads, r_bytes = dump_references3( pipeline_context, baserefs=references, ignore_cache=ignore_cache, raise_exceptions=raise_exceptions) else: r_paths, r_downloads, r_bytes = {}, 0, 0 return dict(list(m_paths.items())+list(r_paths.items())), m_downloads + r_downloads, m_bytes + r_bytes
def remove_checksum(file_): """Remove checksums from `file_`.""" log.info("Removing checksum for", repr(file_)) if config.is_reference(file_): data_file.remove_checksum(file_) elif config.is_mapping(file_): raise exceptions.CrdsError("Mapping checksums cannot be removed for:", repr(file_)) else: raise exceptions.CrdsError( "File", repr(file_), "does not appear to be a CRDS reference or mapping file.")
def print_new_files(self): """Print the references or mappings which are in the second (new) context and not the firtst (old) context. """ if not config.is_mapping(self.old_file) or not config.is_mapping(self.new_file): log.error("--print-new-files really only works for mapping differences.") return -1 old = crds.get_pickled_mapping(self.old_file) # reviewed new = crds.get_pickled_mapping(self.new_file) # reviewed old_mappings = set(old.mapping_names()) new_mappings = set(new.mapping_names()) old_references = set(old.reference_names()) new_references = set(new.reference_names()) status = 0 for name in sorted(new_mappings - old_mappings): print(name) status = 1 for name in sorted(new_references - old_references): print(name) status = 1 return status
def add_checksum(file_): """Add checksums to file_.""" log.info("Adding checksum for", repr(file_)) if config.is_reference(file_): with log.error_on_exception("Failed updating checksum for", repr(file_)): data_file.add_checksum(file_) elif config.is_mapping(file_): update_mapping_checksum(file_) else: raise exceptions.CrdsError( "File", repr(file_), "does not appear to be a CRDS reference or mapping file.")
def dump_files(pipeline_context=None, files=None, ignore_cache=False, raise_exceptions=True): """Unified interface to dump any file in `files`, mapping or reference. Returns localpaths, downloads count, bytes downloaded """ if pipeline_context is None: pipeline_context = get_default_context() if files is None: files = get_mapping_names(pipeline_context) mappings = [ os.path.basename(name) for name in files if config.is_mapping(name) ] references = [ os.path.basename(name) for name in files if not config.is_mapping(name) ] if mappings: m_paths, m_downloads, m_bytes = dump_mappings3( pipeline_context, mappings=mappings, ignore_cache=ignore_cache, raise_exceptions=raise_exceptions) else: m_paths, m_downloads, m_bytes = {}, 0, 0 if references: r_paths, r_downloads, r_bytes = dump_references3( pipeline_context, baserefs=references, ignore_cache=ignore_cache, raise_exceptions=raise_exceptions) else: r_paths, r_downloads, r_bytes = {}, 0, 0 return dict(list(m_paths.items())+list(r_paths.items())), m_downloads + r_downloads, m_bytes + r_bytes
def get_root_url(filename, observatory=None): """Based on the server info, return the base URL the server indicates should be used to download `filename`. """ if observatory is None: observatory = get_default_observatory() info = get_server_info() if config.is_mapping(filename): url = info["mapping_url"][observatory] else: url = info["reference_url"][observatory] if not url.endswith("/"): url += "/" return url
def verify_checksum(file_): """Verify checksums in `file_`.""" log.info("Verifying checksum for", repr(file_)) if config.is_reference(file_): data_file.verify_checksum(file_) elif config.is_mapping(file_): if config.CRDS_IGNORE_MAPPING_CHECKSUM.get(): log.warning( "Mapping checksums are disabled by config.CRDS_IGNORE_MAPPING_CHECKSM." ) rmap.load_mapping(file_) else: raise exceptions.CrdsError( "File", repr(file_), "does not appear to be a CRDS reference or mapping file.")
def list_tpns(self): """Print out the .tpn information related to each of the files either specified via the --tpns <files...> argument or implied by any of the other standard --list mechanisms like --mappings or --references. """ constrained_files = self.get_words(self.args.tpns) + self.implied_files for filename in constrained_files: path = self.locate_file(filename) if config.is_mapping(path): tpn_text = reftypes.get_types_object( self.observatory).reference_name_to_ld_tpn_text(path) else: tpn_text = reftypes.get_types_object( self.observatory).reference_name_to_tpn_text(path) log.divider(f"Certify constraints for '{path}'") print(tpn_text)
def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = config.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) # Only output verification info for slow sha1sum checks by default log.verbose( api.file_progress( "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10 if self.args.check_sha1sum else 60) if not os.path.exists(path): if base not in self.bad_files: log.error("File", repr(base), "doesn't exist at", repr(path)) return # Checks which force repairs should do if/else to avoid repeat repair size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum or config.is_mapping(base): log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60) if self.args.purge_rejected: self.remove_files([path], "file") return if info["blacklisted"] != "false": log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.", verbosity=60) if self.args.purge_blacklisted: self.remove_files([path], "file") return return
def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = rmap.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) # Only output verification info for slow sha1sum checks by default log.verbose( api.file_progress( "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10 if self.args.check_sha1sum else 60) if not os.path.exists(path): log.error("File", repr(base), "doesn't exist at", repr(path)) return # Checks which force repairs should do if/else to avoid repeat repair size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum or config.is_mapping(base): log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60) if self.args.purge_rejected: self.remove_files([path], "files") return if info["blacklisted"] != "false": log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.", verbosity=60) if self.args.purge_blacklisted: self.remove_files([path], "files") return return
def get_file_properties(filename): """Figure out (instrument, filekind, serial) based on `filename` which should be a mapping or ASDF reference file. >>> get_file_properties('tests/data/roman_wfi16_f158_flat_small.asdf') ('wfi', 'flat') >>> get_file_properties('tests/data/roman_wfi_flat_0004.rmap') ('wfi', 'flat') >>> get_file_properties('tests/data/roman_0001.pmap') ('', '') >>> get_file_properties('tests/data/ascii_tab.csv') # doctest: +ELLIPSIS Traceback (most recent call last): ... TypeError: string indices must be integers """ if config.is_mapping(filename): try: return decompose_newstyle_name(filename)[2:4] except Exception: # NOTE: load_mapping more conservative than fetch_mapping used in properties_from_mapping mapping = rmap.load_mapping(filename) return mapping.instrument, mapping.filekind elif config.is_reference(filename): result = get_reference_properties(filename)[2:4] else: try: result = properties_inside_mapping(filename) except Exception as exc: result = get_reference_properties(filename)[2:4] assert result[0] in INSTRUMENTS+[""], "Bad instrument " + \ repr(result[0]) + " in filename " + repr(filename) assert result[1] in FILEKINDS+[""], "Bad filekind " + \ repr(result[1]) + " in filename " + repr(filename) return result
def archive_url(self, filename): """Return the URL used to fetch `filename` from the archive.""" if config.is_mapping(filename): return os.path.join(self.mapping_url, filename) else: return os.path.join(self.reference_url, filename)