Пример #1
0
def visit(observation, **kwargs):
    """
    Clean up the issue described here (multiple planes for the same photons):
    https://github.com/opencadc-metadata-curation/omm2caom2/issues/3
    """
    mc.check_param(observation, Observation)
    logging.info(f'Begin cleanup augmentation for '
                 f'{observation.observation_id}')
    cadc_client = kwargs.get('cadc_client')
    count = 0
    if cadc_client is None:
        logging.warning(
            'Stopping. Need a CADC Client for cleanup augmentation.')
    else:
        if len(observation.planes) > 1:
            # from Daniel, Sylvie - 21-05-20
            # How to figure out which plane is newer:
            # SB - I do not think that we should use the “VERSION” keyword.
            # I think we must go with the ingested date.
            #
            # Daniel Durand
            # Might be better indeed. Need to compare the SCI and the REJECT
            # file and see which one is the latest

            latest_plane_id = None
            latest_timestamp = None
            temp = []
            for plane in observation.planes.values():
                for artifact in plane.artifacts.values():
                    if OmmName.is_preview(artifact.uri):
                        continue
                    meta = cadc_client.info(artifact.uri)
                    if meta is None:
                        logging.warning(
                            f'Did not find {artifact.uri} in CADC storage.')
                    else:
                        if latest_plane_id is None:
                            latest_plane_id = plane.product_id
                            latest_timestamp = mc.make_time(meta.lastmod)
                        else:
                            current_timestamp = mc.make_time(meta.lastmod)
                            if current_timestamp > latest_timestamp:
                                latest_timestamp = current_timestamp
                                temp.append(latest_plane_id)
                                latest_plane_id = plane.product_id
                            else:
                                temp.append(plane.product_id)

            delete_list = list(set(temp))
            for entry in delete_list:
                logging.warning(f'Removing plane {entry} from observation '
                                f'{observation.observation_id}. There are '
                                f'duplicate photons.')
                count += 1
                observation.planes.pop(entry)
                _send_slack_message(entry)

    logging.info(f'Completed cleanup augmentation for '
                 f'{observation.observation_id}')
    return observation
Пример #2
0
def _update_release_date(plane, max_meta_release, headers):
    logging.debug(f'Begin _update_release_date for {plane.product_id}')
    if plane.meta_release is None:
        plane.meta_release = mc.make_time(_get_keyword(headers, 'DATE'))
        if plane.meta_release is None:
            plane.meta_release = mc.make_time(
                _get_keyword(headers, 'REL_DATE'))

    if plane.meta_release is not None:
        max_meta_release = max(max_meta_release, plane.meta_release)

    if plane.data_release is None and plane.meta_release is not None:
        plane.data_release = plane.meta_release
    logging.debug('End _update_release_date')
    return max_meta_release
def test_make_time():
    test_dict = {
        '2012-12-12T12:13:15': datetime(2012, 12, 12, 12, 13, 15),
        # %b %d %H:%M
        'Mar 12 12:12': datetime(2021, 3, 12, 12, 12),
        # %Y-%m-%dHST%H:%M:%S
        '2020-12-12HST12:12:12': datetime(2020, 12, 12, 22, 12, 12),
    }

    for key, value in test_dict.items():
        test_result = mc.make_time(key)
        assert test_result is not None, 'expect a result'
        assert isinstance(test_result, datetime), 'wrong result type'
        assert test_result == value, f'wrong result {test_result} want {value}'
Пример #4
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        :param prev_exec_time datetime start of the timestamp chunk
        :param exec_time datetime end of the timestamp chunk
        :return: a list of file names with time they were modified in /ams,
            structured as an astropy Table (for now).
        """

        self._logger.debug('Entering get_time_box_work')
        # datetime format 2019-12-01T00:00:00.000000
        prev_dt_str = datetime.fromtimestamp(
            prev_exec_time, tz=timezone.utc
        ).strftime(mc.ISO_8601_FORMAT)
        exec_dt_str = datetime.fromtimestamp(
            exec_time, tz=timezone.utc
        ).strftime(mc.ISO_8601_FORMAT)
        query = (
            f"SELECT A.uri, A.lastModified "
            f"FROM caom2.Observation AS O "
            f"JOIN caom2.Plane AS P ON O.obsID = P.obsID "
            f"JOIN caom2.Artifact AS A ON P.planeID = A.planeID "
            f"WHERE P.planeID IN ( "
            f"  SELECT A.planeID "
            f"  FROM caom2.Observation AS O "
            f"  JOIN caom2.Plane AS P ON O.obsID = P.obsID "
            f"  JOIN caom2.Artifact AS A ON P.planeID = A.planeID "
            f"  WHERE O.collection = '{self._config.collection}' "
            f"  GROUP BY A.planeID "
            f"  HAVING COUNT(A.artifactID) = 1 ) "
            f"AND P.dataRelease > '{prev_dt_str}' "
            f"AND P.dataRelease <= '{exec_dt_str}' "
            f"ORDER BY O.maxLastModified ASC "
            ""
        )
        result = clc.query_tap_client(query, self._query_client)
        # results look like:
        # gemini:GEM/N20191202S0125.fits, ISO 8601

        entries = deque()
        for row in result:
            entries.append(
                dsc.StateRunnerMeta(
                    mc.CaomName(row['uri']).file_name,
                    mc.make_time(row['lastModified']).timestamp(),
                )
            )
        return entries
Пример #5
0
    def get_data_release_date(self, ext):
        """Use the 'DATE' keyword for the release date, if the 'RELEASE'
        keyword does not exist.

        Called to fill a blueprint value, must have a
        parameter named ext for import_module loading and execution."""
        rel_date = self._headers[ext].get('RELEASE')
        if rel_date is None:
            rel_date = self._headers[ext].get('DATE')
            intent = self.get_obs_intent(ext)
            if (
                rel_date is not None
                and intent is ObservationIntentType.SCIENCE
            ):
                # DD, SB - slack - 19-03-20 - if release is not in the header
                # observation date plus two years. This only applies to
                # science observations.
                temp = mc.make_time(rel_date)
                rel_date = temp.replace(year=temp.year + 2)
        return rel_date
Пример #6
0
def build_observation(db_content, observation, md_name):

    override = read_md_pk(md_name)

    fqn = override.get('fqn')
    almaca_name = AlmacaName(fname_on_disk=fqn)
    # logging.error(db_content.colnames)
    # logging.error('fqn is {}'.format(fqn))
    field_index = _get_index(almaca_name, db_content)
    # field_index = 0
    if observation is None:
        observation = _build_obs(override, db_content, fqn, field_index,
                                 almaca_name, md_name)

    provenance = get_provenance(almaca_name)
    provenance.inputs.add(
        PlaneURI('caom:ALMA/A001_X88b_X23/A001_X88b_X23-raw'))

    # HK 07-02-20
    # I'm looking at the very first entry, A002_Xb999fd_X602.SCI.J1851+0035.
    # The time bounds listed under all of the second-level planes correspond
    # to a date of Oct 20, 2016, which agrees with the observing date I pull
    # up on listobs.  But in the top level plane, the metaRelease date is
    # listed as Oct 12, 2016.  As we discussed earlier this week, it doesn't
    # make sense to have the meta data released before the observation was
    # even taken. Using the 'end time' of the observation that's already
    # pulled for a lower plane, and putting that as the metaRelease date in
    # the top level would be a good solution.  NB: since all spws are observed
    # simultaneously, you'll get the same answer for whichever of the
    # [high/low]res_spw[X] entries that you pull the information from.
    input_meta_data = read_md_pk(almaca_name.input_ms_metadata)
    meta_release = mc.to_float(input_meta_data.get('end_date'))
    meta_release = time.Time(meta_release, format='mjd')
    meta_release.format = 'isot'
    meta_release_dt = mc.make_time(meta_release.value)

    release_date = db_content['Release date'][field_index]
    if release_date is None:
        raise mc.CadcException('No release date for {}'.format(fqn))
    else:
        release_date = time.Time(release_date).to_datetime()

    logging.error('Add plane {} to {}'.format(almaca_name.product_id,
                                              almaca_name.obs_id))
    plane = Plane(product_id=almaca_name.product_id,
                  data_release=release_date,
                  meta_release=meta_release_dt,
                  provenance=provenance)

    plane.position = build_position(db_content, field_index, md_name)
    plane.energy = build_energy(override)
    plane.polarization = None
    plane.time = build_time(override, almaca_name)

    # HK 14-08-2019
    # dataProductType should be 'visibility'
    plane.data_product_type = DataProductType.VISIBILITY
    plane.calibration_level = CalibrationLevel.CALIBRATED

    observation.planes.add(plane)
    observation.meta_release = plane.meta_release
    # TODO hard-coded
    observation.members.add(ObservationURI('caom:ALMA/A001_X88b_X23'))

    # HK 29-07-19
    # qa/ contains images, plots, and web page status views generated
    # during the original (non-CANFAR) calibration of the raw data.
    # We may want to consider retaining these files as well, as they give
    # a more advanced user an easier way to check on data quality,
    # potential issues with calibration, etc.  I believe they come
    # packaged with the rest of the 'products' tarball on the archive,
    # so they would be obtainable even if we do not keep a copy.  These
    # files are fairly small.
    # TODO override.get('artifact_uri')
    artifact = Artifact(uri=almaca_name.uri,
                        product_type=almaca_name.intent,
                        release_type=ReleaseType.DATA,
                        content_type='application/x-tar',
                        content_length=None)
    plane.artifacts.add(artifact)
    return observation