示例#1
0
文件: sync.py 项目: nden/crds
 def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files):
     """Check one `file` against the provided CRDS database `info` dictionary."""
     path = rmap.locate_file(file, observatory=self.observatory)
     base = os.path.basename(file)
     n_bytes = int(info["size"])
     log.verbose(api.file_progress("Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files),
                 verbosity=10)
     if not os.path.exists(path):
         log.error("File", repr(base), "doesn't exist at", repr(path))
         return
     size = os.stat(path).st_size
     if int(info["size"]) != size:
         self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), 
                               "CRDS size=" + srepr(info["size"]))
     elif self.args.check_sha1sum:
         log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=100)
         sha1sum = utils.checksum(path)
         if info["sha1sum"] == "none":
             log.warning("CRDS doesn't know the checksum for", repr(base))
         elif info["sha1sum"] != sha1sum:
             self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), 
                                   "LOCAL=" + repr(sha1sum))
     if info["state"] not in ["archived", "operational"]:
         log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"]))
     if info["rejected"] != "false":
         log.warning("File", repr(base), "has been explicitly rejected.")
         if self.args.purge_rejected:
             self.remove_files([path], "files")
         return
     if info["blacklisted"] != "false":
         log.warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.")
         if self.args.purge_blacklisted:
             self.remove_files([path], "files")
         return
     return
示例#2
0
文件: sync.py 项目: nden/crds
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
示例#3
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
示例#4
0
 def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files):
     """Check one `file` against the provided CRDS database `info` dictionary."""
     path = rmap.locate_file(file, observatory=self.observatory)
     base = os.path.basename(file)
     n_bytes = int(info["size"])
     log.verbose(api.file_progress("Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files),
                 verbosity=10)
     if not os.path.exists(path):
         log.error("File", repr(base), "doesn't exist at", repr(path))
         return
     size = os.stat(path).st_size
     if int(info["size"]) != size:
         self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), 
                               "CRDS size=" + srepr(info["size"]))
     elif self.args.check_sha1sum:
         log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=100)
         sha1sum = utils.checksum(path)
         if info["sha1sum"] == "none":
             log.warning("CRDS doesn't know the checksum for", repr(base))
         elif info["sha1sum"] != sha1sum:
             self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), 
                                   "LOCAL=" + repr(sha1sum))
     if info["state"] not in ["archived", "operational"]:
         log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"]))
     if info["rejected"] != "false":
         log.warning("File", repr(base), "has been explicitly rejected.")
         if self.args.purge_rejected:
             self.remove_files([path], "files")
         return
     if info["blacklisted"] != "false":
         log.warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.")
         if self.args.purge_blacklisted:
             self.remove_files([path], "files")
         return
     return
示例#5
0
文件: sync.py 项目: nden/crds
 def verify_context_change(self, old_context):
     """Verify that the starting and post-sync contexts are different,  or issue an error."""
     new_context = heavy_client.load_server_info(self.observatory).operational_context
     if old_context == new_context:
         log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context))
     else:
         log.info("Operational context updated from", repr(old_context), "to",  repr(new_context))
示例#6
0
文件: list.py 项目: stscirij/crds
 def list_dataset_headers(self):
     """List dataset header info for self.args.dataset_headers with respect to self.args.context"""
     for context in self.contexts:
         with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), 
                                     "for", repr(self.args.dataset_headers)):
             pars = api.get_dataset_headers_by_id(context, self.args.dataset_headers)
             pmap = crds.get_cached_mapping(context)
             for requested_id in self.args.dataset_headers:
                 for returned_id in sorted(pars.keys()):
                     if requested_id.upper() in returned_id.upper():
                         header = pars[returned_id]
                         if isinstance(header, python23.string_types):
                             log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason
                             continue
                         if self.args.id_expansions_only:
                             print(returned_id, context if len(self.contexts) > 1 else "")
                         else:
                             if self.args.minimize_headers:
                                 header2 = pmap.minimize_header(header)
                             else:
                                 header2 = dict(header)
                             header2.pop("REFTYPE", None)
                             header2["dataset_id"] = returned_id
                             log.info("Dataset pars for", repr(returned_id), "with respect to", repr(context) + ":\n",
                                      log.PP(header2))
                         if self.args.first_id_expansion_only:
                             break
示例#7
0
def rmap_check_modifications(old_rmap, new_rmap, old_ref, new_ref, expected=("add",)):
    """Check the differences between `old_rmap` and `new_rmap` and make sure they're
    limited to the types listed in `expected`.
    
    expected should be "add" or "replace".
    
    Returns as_expected,  True IFF all rmap modifications match `expected`.
    """
    diffs = diff.mapping_diffs(old_rmap, new_rmap)
    as_expected = True
    for difference in diffs:
        actual = diff.diff_action(difference)
        if actual in expected:
            pass   # white-list so it will fail when expected is bogus.
        else:
            log.error("Expected one of", repr(expected), "but got", repr(actual),
                      "from change", repr(difference))
            as_expected = False
    with open(old_rmap) as pfile:
        old_count = len([line for line in pfile.readlines() if os.path.basename(old_ref) in line])
    with open(new_rmap) as pfile:
        new_count = len([line for line in pfile.readlines() if os.path.basename(new_ref) in line])
    if "replace" in expected and old_count != new_count:
        log.error("Replacement COUNT DIFFERENCE replacing", repr(old_ref), "with", repr(new_ref), "in", repr(old_rmap),
                  old_count, "vs.", new_count)
        as_expected = False
    return as_expected
示例#8
0
文件: refactor2.py 项目: nden/crds
def rmap_check_modifications(old_rmap, new_rmap, old_ref, new_ref, expected=("add",)):
    """Check the differences between `old_rmap` and `new_rmap` and make sure they're
    limited to the types listed in `expected`.
    
    expected should be "add" or "replace".
    
    Returns as_expected,  True IFF all rmap modifications match `expected`.
    """
    diffs = diff.mapping_diffs(old_rmap, new_rmap)
    as_expected = True
    for difference in diffs:
        actual = diff.diff_action(difference)
        if actual in expected:
            pass   # white-list so it will fail when expected is bogus.
        else:
            log.error("Expected one of", srepr(expected), "but got", srepr(actual),
                      "from change", srepr(difference))
            as_expected = False
    with open(old_rmap) as pfile:
        old_count = len([line for line in pfile.readlines() if os.path.basename(old_ref) in line])
    with open(new_rmap) as pfile:
        new_count = len([line for line in pfile.readlines() if os.path.basename(new_ref) in line])
    if "replace" in expected and old_count != new_count:
        log.error("Replacement COUNT DIFFERENCE replacing", srepr(old_ref), "with", 
                  srepr(new_ref), "in", srepr(old_rmap),
                  old_count, "vs.", new_count)
        as_expected = False
    return as_expected
示例#9
0
文件: api.py 项目: nden/crds
def get_total_bytes(info_map):
    """Return the total byte count of file info map `info_map`."""
    try:
        return sum([long(info_map[name]["size"]) for name in info_map if "NOT FOUND" not in info_map[name]])
    except Exception as exc:
        log.error("Error computing total byte count: ", str(exc))
        return -1
示例#10
0
文件: list.py 项目: nden/crds
 def list_dataset_headers(self):
     """List dataset header info for self.args.dataset_headers with respect to self.args.context"""
     for context in self.contexts:
         with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), 
                                     "for", repr(self.args.dataset_headers)):
             pars = api.get_dataset_headers_by_id(context, self.args.dataset_headers)
             pmap = crds.get_cached_mapping(context)
             for requested_id in self.args.dataset_headers:
                 for returned_id in sorted(pars.keys()):
                     if requested_id.upper() in returned_id.upper():
                         header = pars[returned_id]
                         if isinstance(header, python23.string_types):
                             log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason
                             continue
                         if self.args.id_expansions_only:
                             print(returned_id, context if len(self.contexts) > 1 else "")
                         else:
                             if self.args.minimize_headers:
                                 header2 = pmap.minimize_header(header)
                             else:
                                 header2 = dict(header)
                             header2.pop("REFTYPE", None)
                             header2["dataset_id"] = returned_id
                             log.info("Dataset pars for", repr(returned_id), "with respect to", repr(context) + ":\n",
                                      log.PP(header2))
                         if self.args.first_id_expansion_only:
                             break
示例#11
0
文件: api.py 项目: nden/crds
def _get_cache_filelist_and_report_errors(bestrefs):
    """Compute the list of files to download based on the `bestrefs` dictionary,
    skimming off and reporting errors, and raising an exception on the last error seen.

    Return the list of files to download,  collapsing complex return types like tuples
    and dictionaries into a list of simple filenames.
    """
    wanted = []
    last_error = None
    for filetype, refname in bestrefs.items():
        if isinstance(refname, tuple):
            wanted.extend(list(refname))
        elif isinstance(refname, dict):
            wanted.extend(refname.values())
        elif isinstance(refname, python23.string_types):
            if "NOT FOUND" in refname:
                if "n/a" in refname.lower():
                    log.verbose("Reference type", repr(filetype),
                                "NOT FOUND.  Skipping reference caching/download.")
                else:
                    last_error = CrdsLookupError("Error determining best reference for",
                                                 repr(str(filetype)), " = ", str(refname)[len("NOT FOUND"):])
                    log.error(str(last_error))
            else:
                wanted.append(refname)
        else:
            last_error = CrdsLookupError("Unhandled bestrefs return value type for " + repr(str(filetype)))
            log.error(str(last_error))
    if last_error is not None:
        raise last_error
    return wanted
示例#12
0
 def verify_context_change(self, old_context):
     """Verify that the starting and post-sync contexts are different,  or issue an error."""
     new_context = heavy_client.load_server_info(self.observatory).operational_context
     if old_context == new_context:
         log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context))
     else:
         log.info("Operational context updated from", repr(old_context), "to",  repr(new_context))
示例#13
0
文件: sync.py 项目: nden/crds
 def error_and_repair(self, file, *args, **keys):
     """Issue an error message and repair `file` if requested by command line args."""
     log.error(*args, **keys)
     if self.args.repair_files:
         if config.writable_cache_or_info("Skipping remove and dump of", repr(file)):
             log.info("Repairing file", repr(file))
             utils.remove(file, observatory=self.observatory)
             self.dump_files(self.default_context, [file]) 
示例#14
0
 def error_and_repair(self, file, *args, **keys):
     """Issue an error message and repair `file` if requested by command line args."""
     log.error(*args, **keys)
     if self.args.repair_files:
         if config.writable_cache_or_info("Skipping remove and dump of", repr(file)):
             log.info("Repairing file", repr(file))
             utils.remove(file, observatory=self.observatory)
             self.dump_files(self.default_context, [file]) 
示例#15
0
 def verify_archive_file(self, filename):
     """Verify the likely presence of `filename` on the archive web server.  Issue an ERROR if absent."""
     url = self.archive_url(filename)
     response = requests.head(url)
     if response.status_code in [200,]:
         log.verbose("File", repr(filename), "is available from", repr(url))
         self.check_length(filename, response)
     else:
         log.error("File", repr(filename), "failed HTTP HEAD with code =", response.status_code, "from", repr(url))
         self.missing_files.append(filename)
示例#16
0
 def check_length(self, filename, response):
     """Check the content-length reported by HEAD against the CRDS database's file size."""
     archive_size = python23.long(response.headers["content-length"])
     crds_size = python23.long(self.file_info[filename]["size"])
     if archive_size != crds_size:
         log.error("File", repr(filename), "available but length bad.  crds size:", crds_size,
                   "archive size:", archive_size)
         self.bad_length_files.append(filename)
     else:
         log.verbose("File", repr(filename), "lengths agree:", crds_size)
示例#17
0
 def _check_error(self, response, xpath_spec, error_prefix):
     """Extract the `xpath_spec` text from `response`,  if present call fatal_error() with
     `error_prefix` and the response `xpath_spec` text.
     """
     error_msg_parse = html.fromstring(response.text).xpath(xpath_spec)
     error_message = error_msg_parse and error_msg_parse[0].text.strip()
     if error_message:
         if error_message.startswith("ERROR: "):
             error_message = error_message[len("ERROR: "):]
         log.error(error_prefix, error_message)
示例#18
0
文件: web.py 项目: stscirij/crds
 def _check_error(self, response, xpath_spec, error_prefix):
     """Extract the `xpath_spec` text from `response`,  if present call fatal_error() with
     `error_prefix` and the response `xpath_spec` text.
     """
     error_msg_parse = html.fromstring(response.text).xpath(xpath_spec)
     error_message = error_msg_parse and error_msg_parse[0].text.strip()
     if error_message:
         if error_message.startswith("ERROR: "):
             error_message = error_message[len("ERROR: "):]
         log.error(error_prefix, error_message)
示例#19
0
 def check_length(self, filename, response):
     """Check the content-length reported by HEAD against the CRDS database's file size."""
     archive_size = python23.long(response.headers["content-length"])
     crds_size = python23.long(self.file_info[filename]["size"])
     if archive_size != crds_size:
         log.error("File", repr(filename),
                   "available but length bad.  crds size:", crds_size,
                   "archive size:", archive_size)
         self.bad_length_files.append(filename)
     else:
         log.verbose("File", repr(filename), "lengths agree:", crds_size)
示例#20
0
 def ignore_errors(self, i, affected):
     """Check each context switch for errors during bestrefs run. Fail or return False on errors."""
     ignore = False
     if affected.bestrefs_status != 0:
         message = log.format("CRDS server-side errors for", i, affected.computation_dir)
         if self.args.ignore_errant_history:
             ignore = True
         if self.args.fail_on_errant_history:
             self.fatal_error(message)
         else:
             log.error(message)
     return ignore          
示例#21
0
 def handle_done(self, message):
     """Generic "done" handler issue info() message and stops monitoring / exits."""
     status = message.data["status"]
     if status == 0:
         log.info(self.format_remote("COMPLETED:", message.data))
     elif status == 1:
         log.error(self.format_remote("FAILED:", message.data))
     elif status == 2:
         log.error(self.format_remote("CANCELLED:", message.data))
     else:
         log.info(self.format_remote("DONE:", message))
     return message.data["result"]
示例#22
0
文件: cmdline.py 项目: stscirij/crds
 def log_and_track_error(self, data, instrument, filekind, *params, **keys):
     """Issue an error message and record the first instance of each unique kind of error,  where "unique"
     is defined as (instrument, filekind, msg_text) and omits data id.
     """
     msg = self.format_prefix(data, instrument, filekind, *params, **keys)
     log.error(msg)
     key = log.format(instrument, filekind, *params, **keys)
     if key not in self.ue_mixin.messages:
         self.ue_mixin.messages[key] = msg
         self.ue_mixin.unique_data_names.add(data)
     self.ue_mixin.count[key] += 1
     self.ue_mixin.all_data_names.add(data)
     return None # for log.exception_trap_logger  --> don't reraise
示例#23
0
文件: cmdline.py 项目: nden/crds
 def log_and_track_error(self, data, instrument, filekind, *params, **keys):
     """Issue an error message and record the first instance of each unique kind of error,  where "unique"
     is defined as (instrument, filekind, msg_text) and omits data id.
     """
     msg = self.format_prefix(data, instrument, filekind, *params, **keys)
     log.error(msg)
     key = log.format(instrument, filekind, *params, **keys)
     if key not in self.ue_mixin.messages:
         self.ue_mixin.messages[key] = msg
         self.ue_mixin.unique_data_names.add(data)
     self.ue_mixin.count[key] += 1
     self.ue_mixin.all_data_names.add(data)
     return None # for log.exception_trap_logger  --> don't reraise
示例#24
0
 def main(self):
     """Process command line parameters in to a context and list of
     reference files.   Print out the match tuples within the context
     which contain the reference files.
     """
     if self.matched_files:
         self.dump_reference_matches()
     elif self.args.datasets or self.args.instrument:
         self.dump_dataset_headers()
     else:
         self.print_help()
         log.error("Specify --files to dump reference match cases or --datasets to dump dataset matching parameters.")
     return log.errors()
示例#25
0
 def verify_archive_file(self, filename):
     """Verify the likely presence of `filename` on the archive web server.  Issue an ERROR if absent."""
     url = self.archive_url(filename)
     response = requests.head(url)
     if response.status_code in [
             200,
     ]:
         log.verbose("File", repr(filename), "is available from", repr(url))
         self.check_length(filename, response)
     else:
         log.error("File", repr(filename), "failed HTTP HEAD with code =",
                   response.status_code, "from", repr(url))
         self.missing_files.append(filename)
示例#26
0
 def ignore_errors(self, i, affected):
     """Check each context switch for errors during bestrefs run. Fail or return False on errors."""
     ignore = False
     if affected.bestrefs_status != 0:
         message = log.format("CRDS server-side errors for", i,
                              affected.computation_dir)
         if self.args.ignore_errant_history:
             ignore = True
         if self.args.fail_on_errant_history:
             self.fatal_error(message)
         else:
             log.error(message)
     return ignore
示例#27
0
文件: monitor.py 项目: nden/crds
 def handle_done(self, message):
     """Generic "done" handler issue info() message and stops monitoring / exits."""
     status = message.data["status"]
     result = message.data.get("result", None)
     if status == 0:
         log.info(self.format_remote("COMPLETED:", result))
     elif status == 1:
         log.error(self.format_remote("FAILED:", result))
     elif status == 2:
         log.error(self.format_remote("CANCELLED:", result))
     else:
         log.info(self.format_remote("DONE:", result))
     self.result = result
     return result
示例#28
0
 def main(self):
     """Process command line parameters in to a context and list of
     reference files.   Print out the match tuples within the context
     which contain the reference files.
     """
     if self.matched_files:
         self.dump_reference_matches()
     elif self.args.datasets or self.args.instrument:
         self.dump_dataset_headers()
     else:
         self.print_help()
         log.error(
             "Specify --files to dump reference match cases or --datasets to dump dataset matching parameters."
         )
     return log.errors()
示例#29
0
 def list_datasets(self):
     """List dataset header info for self.args.datasets with respect to self.args.context"""
     for context in self.contexts:
         with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), 
                                     "for", repr(self.args.datasets)):
             pars = api.get_dataset_headers_by_id(context, self.args.datasets)
             pmap = rmap.get_cached_mapping(context)
             for (dataset_id, header) in pars.items():
                 if isinstance(header, python23.string_types):
                     log.error("No header for", repr(dataset_id), ":", repr(header)) # header is reason
                     continue
                 header2 = pmap.minimize_header(header)
                 header2.pop("REFTYPE", None)
                 log.info("Dataset pars for", repr(dataset_id), "with respect to", repr(context) + ":\n",
                          log.PP(header2))
示例#30
0
 def main(self):
     """Synchronize files."""
     if self.args.dry_run:
         self.args.readonly_cache = True
     if self.args.repair_files:
         self.args.check_files = True
     if self.args.organize:   # do this before syncing anything under the current mode.
         self.organize_references(self.args.organize)
     self.require_server_connection()
     if self.readonly_cache and self.args.verify_context_change:
         log.error("--readonly-cache and --verify-context-change are incompatible,  a readonly cache cannot change.")
     if self.args.files:
         self.sync_explicit_files()
         verify_file_list = self.files
     elif self.args.fetch_sqlite_db:
         self.fetch_sqlite_db()
     elif self.contexts:
         active_mappings = self.get_context_mappings()
         verify_file_list = active_mappings
         if self.args.fetch_references or self.args.purge_references:
             if self.args.dataset_files or self.args.dataset_ids:
                 active_references = self.sync_datasets()
             else:
                 active_references = self.get_context_references()
             active_references = sorted(set(active_references + self.get_conjugates(active_references)))
             if self.args.fetch_references:
                 self.fetch_references(active_references)
                 verify_file_list += active_references
             if self.args.purge_references:
                 self.purge_references(active_references)    
         if self.args.purge_mappings:
             self.purge_mappings()
     else:
         log.error("Define --all, --contexts, --last, --range, --files, or --fetch-sqlite-db to sync.")
         sys.exit(-1)
     if self.args.check_files or self.args.check_sha1sum or self.args.repair_files:
         self.verify_files(verify_file_list)
     if self.args.verify_context_change:
         old_context = heavy_client.load_server_info(self.observatory).operational_context
     heavy_client.update_config_info(self.observatory)
     if self.args.verify_context_change:
         self.verify_context_change(old_context)
     if self.args.push_context:
         self.push_context()
     self.report_stats()
     log.standard_status()
     return log.errors()
示例#31
0
 def verify_files(self, files):
     """Check `files` against the CRDS server database to ensure integrity and check reject status."""
     basenames = [os.path.basename(file) for file in files]
     try:
         log.verbose("Downloading verification info for", len(basenames), "files.", verbosity=10)
         infos = api.get_file_info_map(observatory=self.observatory, files=basenames, 
                                      fields=["size","rejected","blacklisted","state","sha1sum"])
     except Exception as exc:
         log.error("Failed getting file info.  CACHE VERIFICATION FAILED.  Exception: ", repr(str(exc)))
         return
     bytes_so_far = 0
     total_bytes = api.get_total_bytes(infos)
     for nth_file, file in enumerate(files):
         bfile = os.path.basename(file)
         if infos[bfile] == "NOT FOUND":
             log.error("CRDS has no record of file", repr(bfile))
         else:
             self.verify_file(file, infos[bfile], bytes_so_far, total_bytes, nth_file, len(files))
             bytes_so_far += int(infos[bfile]["size"])
示例#32
0
文件: sync.py 项目: nden/crds
 def verify_files(self, files):
     """Check `files` against the CRDS server database to ensure integrity and check reject status."""
     basenames = [os.path.basename(file) for file in files]
     try:
         log.verbose("Downloading verification info for", len(basenames), "files.", verbosity=10)
         infos = api.get_file_info_map(observatory=self.observatory, files=basenames, 
                                      fields=["size","rejected","blacklisted","state","sha1sum"])
     except Exception as exc:
         log.error("Failed getting file info.  CACHE VERIFICATION FAILED.  Exception: ", repr(str(exc)))
         return
     bytes_so_far = 0
     total_bytes = api.get_total_bytes(infos)
     for nth_file, file in enumerate(files):
         bfile = os.path.basename(file)
         if infos[bfile] == "NOT FOUND":
             log.error("CRDS has no record of file", repr(bfile))
         else:
             self.verify_file(file, infos[bfile], bytes_so_far, total_bytes, nth_file, len(files))
             bytes_so_far += int(infos[bfile]["size"])
示例#33
0
 def print_new_files(self):
     """Print the references or mappings which are in the second (new) context and not
     the firtst (old) context.
     """
     if not rmap.is_mapping(self.old_file) or not rmap.is_mapping(self.new_file):
         log.error("--print-new-files really only works for mapping differences.")
         return -1
     old = rmap.get_cached_mapping(self.old_file)
     new = rmap.get_cached_mapping(self.new_file)
     old_mappings = set(old.mapping_names())
     new_mappings = set(new.mapping_names())
     old_references = set(old.reference_names())
     new_references = set(new.reference_names())
     status = 0
     for name in sorted(new_mappings - old_mappings):
         print(name)
         status = 1
     for name in sorted(new_references - old_references):
         print(name)
         status = 1
     return status
示例#34
0
 def validate_expansions(self, pmap):
     for instrument in pmap.selections:
         if instrument not in self:
             log.verbose("Instrument", repr(instrument),
                         "has no substitutions.")
             continue
         imap = pmap.get_imap(instrument)
         valid_values = imap.get_valid_values_map(condition=True,
                                                  remove_special=False)
         for parameter, values in self[instrument].get_expansion_values(
         ).items():
             for value in values:
                 if parameter not in valid_values or not valid_values[
                         parameter]:
                     log.verbose("For", repr(instrument), "parameter",
                                 repr(parameter), "with value", repr(value),
                                 "is unchecked.")
                     continue
                 if value not in valid_values[parameter]:
                     log.error("For", repr(instrument), "parameter",
                               repr(parameter), "value", repr(value),
                               "is not in", valid_values[parameter])
示例#35
0
文件: api.py 项目: nden/crds
 def download_files(self, downloads, localpaths):
     """Serial file-by-file download."""
     self.info_map = get_file_info_map(
         self.observatory, downloads, ["size", "rejected", "blacklisted", "state", "sha1sum", "instrument"])
     if config.writable_cache_or_verbose("Readonly cache, skipping download of (first 5):", repr(downloads[:5]), verbosity=70):
         bytes_so_far = 0
         total_files = len(downloads)
         total_bytes = get_total_bytes(self.info_map)
         for nth_file, name in enumerate(downloads):
             try:
                 if "NOT FOUND" in self.info_map[name]:
                     raise CrdsDownloadError("file is not known to CRDS server.")
                 bytes, path = self.catalog_file_size(name), localpaths[name]
                 log.info(file_progress("Fetching", name, path, bytes, bytes_so_far, total_bytes, nth_file, total_files))
                 self.download(name, path)
                 bytes_so_far += os.stat(path).st_size
             except Exception as exc:
                 if self.raise_exceptions:
                     raise
                 else:
                     log.error("Failure downloading file", repr(name), ":", str(exc))
         return bytes_so_far
     return 0
示例#36
0
 def print_new_files(self):
     """Print the references or mappings which are in the second (new) context and not
     the firtst (old) context.
     """
     if not rmap.is_mapping(self.old_file) or not rmap.is_mapping(
             self.new_file):
         log.error(
             "--print-new-files really only works for mapping differences.")
         return -1
     old = crds.get_pickled_mapping(self.old_file)  # reviewed
     new = crds.get_pickled_mapping(self.new_file)  # reviewed
     old_mappings = set(old.mapping_names())
     new_mappings = set(new.mapping_names())
     old_references = set(old.reference_names())
     new_references = set(new.reference_names())
     status = 0
     for name in sorted(new_mappings - old_mappings):
         print(name)
         status = 1
     for name in sorted(new_references - old_references):
         print(name)
         status = 1
     return status
示例#37
0
文件: monitor.py 项目: stscirij/crds
 def handle_error(self, message):
     """Generic "error" handler issues an error message from remote process and
     continues monitoring.
     """
     log.error(self.format_remote(message.data))
     return False
示例#38
0
文件: sync.py 项目: nden/crds
    def main(self):
        """Synchronize files."""

        if self.args.dry_run:
            self.args.readonly_cache = True

        if self.args.repair_files:
            self.args.check_files = True

        if self.args.output_dir:
            os.environ["CRDS_MAPPATH_SINGLE"] = self.args.output_dir
            os.environ["CRDS_REFPATH_SINGLE"] = self.args.output_dir
            os.environ["CRDS_CFGPATH_SINGLE"] = self.args.output_dir
            os.environ["CRDS_PICKLEPATH_SINGLE"] = self.args.output_dir

        if self.args.clear_pickles or self.args.ignore_cache or self.args.repair_files:
            self.clear_pickles(self.contexts)

        if self.args.organize:   # do this before syncing anything under the current mode.
            self.organize_references(self.args.organize)

        self.require_server_connection()

        if self.readonly_cache and self.args.verify_context_change:
            log.error("--readonly-cache and --verify-context-change are incompatible,  a readonly cache cannot change.")

        if self.args.files:
            self.sync_explicit_files()
            verify_file_list = self.files
        elif self.args.fetch_sqlite_db:
            self.fetch_sqlite_db()
        elif self.contexts:
            active_mappings = self.get_context_mappings()
            verify_file_list = active_mappings
            if self.args.fetch_references or self.args.purge_references:
                if self.args.dataset_files or self.args.dataset_ids:
                    active_references = self.sync_datasets()
                else:
                    active_references = self.get_context_references()
                active_references = sorted(set(active_references + self.get_conjugates(active_references)))
                if self.args.fetch_references:
                    self.fetch_references(active_references)
                    verify_file_list += active_references
                if self.args.purge_references:
                    self.purge_references(active_references)    
            if self.args.purge_mappings:
                self.purge_mappings()
        else:
            log.error("Define --all, --contexts, --last, --range, --files, or --fetch-sqlite-db to sync.")
            sys.exit(-1)

        if self.args.check_files or self.args.check_sha1sum or self.args.repair_files:
            self.verify_files(verify_file_list)
            
        if self.args.save_pickles:
            self.pickle_contexts(self.contexts)

        if self.args.verify_context_change:
            old_context = heavy_client.load_server_info(self.observatory).operational_context

        heavy_client.update_config_info(self.observatory)

        if self.args.verify_context_change:
            self.verify_context_change(old_context)

        if self.args.push_context:
            self.push_context()
            
        self.report_stats()
        log.standard_status()
        return log.errors()
示例#39
0
文件: monitor.py 项目: nden/crds
 def handle_error(self, message):
     """Generic "error" handler issues an error message from remote process and
     continues monitoring.
     """
     log.error(self.format_remote(message.data))
     return False
示例#40
0
文件: monitor.py 项目: nden/crds
 def handle_fail(self, message):
     """Generic "fail" handler reports on remote process fatal error / failure
     and issues an error() message, then stops monitoring /exits.
     """
     log.error(self.format_remote("Processing failed:", message.data))
     return message.data["result"]
示例#41
0
文件: locate.py 项目: nden/crds
def check_naming_consistency(checked_instrument=None,
                             exhaustive_mapping_check=False):
    """Dev function to compare the properties returned by name decomposition
    to the properties determined by file contents and make sure they're the same.
    Also checks rmap membership.

    >> from crds.tests import test_config
    >> old_config = test_config.setup()
    >> check_naming_consistency("acs")
    >> check_naming_consistency("cos")
    >> check_naming_consistency("nicmos")
    >> check_naming_consistency("stis")
    >> check_naming_consistency("wfc3")
    >> check_naming_consistency("wfpc2")
    >> test_config.cleanup(old_config)
    """
    from crds import certify

    for ref in rmap.list_references("*", observatory="hst", full_path=True):
        with log.error_on_exception("Failed processing:", repr(ref)):

            _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path(
                ref)

            if checked_instrument is not None and instrument != checked_instrument:
                continue

            if data_file.is_geis_data(ref):
                if os.path.exists(data_file.get_conjugate(ref)):
                    continue
                else:
                    log.warning("No GEIS header for", repr(ref))

            log.verbose("Processing:", instrument, filekind, ref)

            _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header(
                ref)
            if instrument != instrument2:
                log.error("Inconsistent instruments", repr(instrument), "vs.",
                          repr(instrument2), "for", repr(ref))
            if filekind != filekind2:
                log.error("Inconsistent filekinds", repr(filekind), "vs.",
                          repr(filekind2), "for", repr(ref))

            for pmap_name in reversed(
                    sorted(rmap.list_mappings("*.pmap", observatory="hst"))):

                pmap = crds.get_cached_mapping(pmap_name)

                r = certify.find_governing_rmap(pmap_name, ref)

                if not r:
                    continue

                if r.instrument != instrument:
                    log.error("Rmap instrument", repr(r.instrument),
                              "inconsistent with name derived instrument",
                              repr(instrument), "for", repr(ref), "in",
                              repr(pmap_name))
                if r.filekind != filekind:
                    log.error("Rmap filekind", repr(r.filekind),
                              "inconsistent with name derived filekind",
                              repr(filekind), "for", repr(ref), "in",
                              repr(pmap_name))
                if r.instrument != instrument2:
                    log.error("Rmap instrument", repr(r.instrument),
                              "inconsistent with content derived instrument",
                              repr(instrument2), "for", repr(ref), "in",
                              repr(pmap_name))
                if r.filekind != filekind2:
                    log.error("Rmap filekind", repr(r.filekind),
                              "inconsistent with content derived filekind",
                              repr(filekind2), "for", repr(ref), "in",
                              repr(pmap_name))

                if not exhaustive_mapping_check:
                    break

            else:
                log.error("Orphan reference", repr(ref),
                          "not found under any context.")
示例#42
0
文件: locate.py 项目: stscirij/crds
def check_naming_consistency(checked_instrument=None, exhaustive_mapping_check=False):
    """Dev function to compare the properties returned by name decomposition
    to the properties determined by file contents and make sure they're the same.
    Also checks rmap membership.

    >> from crds.tests import test_config
    >> old_config = test_config.setup()
    >> check_naming_consistency("acs")
    >> check_naming_consistency("cos")
    >> check_naming_consistency("nicmos")
    >> check_naming_consistency("stis")
    >> check_naming_consistency("wfc3")
    >> check_naming_consistency("wfpc2")
    >> test_config.cleanup(old_config)
    """
    from crds import certify

    for ref in rmap.list_references("*", observatory="hst", full_path=True):
        with log.error_on_exception("Failed processing:", repr(ref)):

            _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path(ref)

            if checked_instrument is not None and instrument != checked_instrument:
                continue

            if data_file.is_geis_data(ref):
                if os.path.exists(data_file.get_conjugate(ref)):
                    continue
                else:
                    log.warning("No GEIS header for", repr(ref))

            log.verbose("Processing:", instrument, filekind, ref)
            
            _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header(ref)
            if instrument != instrument2:
                log.error("Inconsistent instruments", repr(instrument), "vs.", repr(instrument2), 
                          "for", repr(ref))
            if filekind != filekind2:
                log.error("Inconsistent filekinds", repr(filekind), "vs.", repr(filekind2), 
                          "for", repr(ref))

            for pmap_name in reversed(sorted(rmap.list_mappings("*.pmap", observatory="hst"))):

                pmap = crds.get_cached_mapping(pmap_name)

                r = certify.find_governing_rmap(pmap_name, ref)

                if not r:
                    continue

                if r.instrument != instrument:
                    log.error("Rmap instrument", repr(r.instrument), 
                              "inconsistent with name derived instrument", repr(instrument), "for", repr(ref), "in", repr(pmap_name))
                if r.filekind != filekind:
                    log.error("Rmap filekind", repr(r.filekind), 
                              "inconsistent with name derived filekind", repr(filekind), "for", repr(ref), "in", repr(pmap_name))
                if r.instrument != instrument2:
                    log.error("Rmap instrument", repr(r.instrument), 
                              "inconsistent with content derived instrument", repr(instrument2), "for", repr(ref), "in", repr(pmap_name))
                if r.filekind != filekind2:
                    log.error("Rmap filekind", repr(r.filekind), 
                              "inconsistent with content derived filekind", repr(filekind2), "for", repr(ref), "in", repr(pmap_name))
                
                if not exhaustive_mapping_check:
                    break

            else:
                log.error("Orphan reference", repr(ref), "not found under any context.")
示例#43
0
文件: monitor.py 项目: stscirij/crds
 def handle_fail(self, message):
     """Generic "fail" handler reports on remote process fatal error / failure
     and issues an error() message, then stops monitoring /exits.
     """
     log.error(self.format_remote("Processing failed:",  message.data))
     return message.data["result"]