Exemplo n.º 1
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Exemplo n.º 2
0
def get_reffiles(parameter_dict, reffile_types, download=True):
    """Determine CRDS's best reference files to use for a particular
    observation, and download them if they are not already present in
    the ``CRDS_PATH``. The determination is made based on the
    information in the ``parameter_dictionary``.

    Parameters
    ----------
    parameter_dict : dict
        Dictionary of basic metadata from the file to be processed by
        the returned reference files (e.g. ``INSTRUME``, ``DETECTOR``,
        etc)

    reffile_types : list
        List of reference file types to look up and download. These must
        be contained in CRDS's list of reference file types.

    download : bool
        If ``True`` (default), the identified best reference files will
        be downloaded. If ``False``, the dictionary of best reference
        files will still be returned, but the files will not be
        downloaded. The use of ``False`` is primarily intended to
        support testing on Travis.

    Returns
    -------
    reffile_mapping : dict
        Mapping of downloaded CRDS file locations
    """

    # IMPORTANT: Import of crds package must be done AFTER the environment
    # variables are set in the functions above
    import crds
    from crds import CrdsLookupError

    if download:
        try:
            reffile_mapping = crds.getreferences(parameter_dict, reftypes=reffile_types)
        except CrdsLookupError:
            raise ValueError("ERROR: CRDSLookupError when trying to find reference files for parameters: {}".format(parameter_dict))
    else:
        # If the files will not be downloaded, still return the same local
        # paths that are returned when the files are downloaded. Note that
        # this follows the directory structure currently assumed by CRDS.
        crds_path = os.environ.get('CRDS_PATH')
        try:
            reffile_mapping = crds.getrecommendations(parameter_dict, reftypes=reffile_types)
        except CrdsLookupError:
            raise ValueError("ERROR: CRDSLookupError when trying to find reference files for parameters: {}".format(parameter_dict))

        for key, value in reffile_mapping.items():
            # Check for NOT FOUND must be done here because the following
            # line will raise an exception if NOT FOUND is present
            if "NOT FOUND" in value:
                reffile_mapping[key] = "NOT FOUND"
            else:
                instrument = value.split('_')[1]
                reffile_mapping[key] = os.path.join(crds_path, 'references/jwst', instrument, value)

    return reffile_mapping
Exemplo n.º 3
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset),
                                                 "under context", repr(context)):
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory,
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
def reffile_test(path_to_input_file, pipeline_step, logfile=None,
                 input_file=None):
    """
    This is a new version of reffile_test which uses crds.matches instead of
    working with the reference file metadata directly. That way, if the rmap
    was updated manually on CRDS (to avoid redelivering files for a minor
    keyword change), this will test the actual match criteria.
    """
    log_msgs = []

    logstream, errstream = get_streams(logfile=logfile)
    
    #Convert pipeline step to a header keyword if necessary
    if pipeline_step.upper().startswith("R_"):
        step_key = pipeline_step.upper()
    else:
        if len(pipeline_step) >= 6:
            step_key = "R_" + pipeline_step.upper()[:6]
        else:
            step_key = "R_" + pipeline_step.upper()
    
    #Identify the context
    context = fits.getval(path_to_input_file, "CRDS_CTX")
    
    #Identify the reference file
    try:
        reffile_name = fits.getval(path_to_input_file, step_key)
    except KeyError:
        print("Invalid pipeline step", file=errstream)
        log_msgs.append("Invalid pipeline step")
        return None
    
    reffile_name = reffile_name.replace('crds://', '')
    
    #Is there a reference file for this step? If not, PASS
    if reffile_name == "N/A":
        print("No reference file for step {}.".format(pipeline_step), file=errstream)
        log_msgs.append("No reference file for step {}.".format(pipeline_step))
        return ""
    
    #Grab metadata from the input and reference files
    if input_file is None:
        input_file = load_input_file(path_to_input_file, logstream=logstream)
    print("Grabbing CRDS match criteria...", file=logstream)
    log_msgs.append("Grabbing CRDS match criteria...")
    try:
        match_criteria = ref_matches(context, reffile_name)[0]
    except ValueError:
        import pdb; pdb.set_trace()
    
    tests = {} #store all the tests in a single dictionary

    # add instrument name in the expected keyword
    match_criteria['META.INSTRUMENT.NAME'] = 'NIRSPEC'

    # make sure that the subarray keyword is correct for the size of the data
    #subarray = get_subarray(path_to_input_file)
    subarray = fits.getval(path_to_input_file, 'SUBARRAY', 0)
    match_criteria['META.SUBARRAY.NAME'] = subarray

    #Test whether the recommended reference file was actually selected
    recommended_reffile = getrecommendations(match_criteria,
                                             reftypes=[pipeline_step],
                                             context=context,
                                             fast=True)

    if isinstance(recommended_reffile, str):
        recommended_reffile = os.path.basename(recommended_reffile) #remove path, only want to test filename
        tests['RECOMMENDATION'] = recommended_reffile == reffile_name
    else:
        msg1 = '* WARNING: Unable to find recommendation for the reference file:'
        msg2 = '        Match criteria determined by pipeline to find reference file: ', repr(match_criteria)
        msg3 = '        Recommendation dictionary = '+repr(recommended_reffile)
        log_msgs.append(msg1)
        log_msgs.append(msg2)
        log_msgs.append(msg3)
        print(msg1)
        print(msg2)
        print(msg3)

    #Remove irrelevant match criteria
    del match_criteria['observatory']
    del match_criteria['instrument']
    del match_criteria['filekind']
    
    #Useafter dates require special handling
    if "META.OBSERVATION.DATE" not in match_criteria:
        tests['USEAFTER'] = True
    else:
        input_date = input_file.meta.observation.date
        input_time = input_file.meta.observation.time
        input_obstime = Time(input_date + "T" + input_time)
        ref_date = match_criteria.pop("META.OBSERVATION.DATE")
        ref_time = match_criteria.pop("META.OBSERVATION.TIME")
        ref_useafter = Time(ref_date + "T" + ref_time)
        tests["USEAFTER"] = input_obstime >= ref_useafter
        #Note that this does NOT check whether there is a more recent
        #(but still valid) reference file that could have been selected
    
    #Loop over the rest of the matching criteria
    for criterion, value in match_criteria.items():
        tests[criterion] = check_meta(input_file, criterion, value)
    
    final = all([x or x is None for x in tests.values()])
    
    failures = []
    failmsg = "{}: reffile value {}, input value {}"
    
    #Finally, print out the results of the tests
    print("REFERENCE FILE SELECTION TEST", file=logstream)
    print("  Input file: {}".format(path_to_input_file), file=logstream)
    print("  Pipeline step: {}".format(pipeline_step), file=logstream)
    print("  Header keyword: {}".format(step_key), file=logstream)
    print("  Reference file selected: {}".format(reffile_name), file=logstream)
    print("  **Metadata tests performed:**", file=logstream)
    log_msgs.append("REFERENCE FILE SELECTION TEST")
    log_msgs.append("  Input file: {}".format(path_to_input_file))
    log_msgs.append("  Pipeline step: {}".format(pipeline_step))
    log_msgs.append("  Header keyword: {}".format(step_key))
    log_msgs.append("  Reference file selected: {}".format(reffile_name))
    log_msgs.append("  **Metadata tests performed:**")
    rescode = {None: "N/A", True: "PASS", False: "FAIL"}
    for meta in sorted(tests):
        result = tests[meta]
        print("    {}: {}".format(meta, rescode[result]), file=logstream)
        if rescode[result] == "FAIL":
            if meta == "USEAFTER":
                ival = input_obstime
                rval = ref_useafter
            else:
                ival = input_file[meta.lower()]
                rval = match_criteria[meta]
            failures.append(failmsg.format(meta, rval, ival))
            print("      Input file value: {}".format(ival), file=logstream)
            print("      Reference file value: {}".format(rval), file=logstream)
            log_msgs.append("      Input file value: {}".format(ival))
            log_msgs.append("      Reference file value: {}".format(rval))

    print("  Final result: {}".format(rescode[final]), file=logstream)
    log_msgs.append("  Final result: {}".format(rescode[final]))

    #Close the output stream if necessary
    if logfile is not None:
        logstream.close()
    
    return "\n".join(failures), log_msgs