def test_caom_name():
    cn = mc.CaomName(uri='ad:TEST/test_obs_id.fits.gz')
    assert cn.file_id == 'test_obs_id'
    assert cn.file_name == 'test_obs_id.fits.gz'
    assert cn.uncomp_file_name == 'test_obs_id.fits'
    assert (mc.CaomName.make_obs_uri_from_obs_id(
        'TEST', 'test_obs_id') == 'caom:TEST/test_obs_id')
示例#2
0
 def get_version(entry):
     """The parameter may be a URI, or just the file name."""
     # file name looks like:
     # 'VLASS1.2.ql.T20t12.J092604+383000.10.2048.v2.I.iter1.image.
     #                'pbcor.tt0.rms.subim.fits'
     file_name = entry
     if '/' in entry:
         file_name = mc.CaomName(entry).file_name
     bits = file_name.split('.')
     version_str = bits[7].replace('v', '')
     return mc.to_int(version_str)
示例#3
0
 def __init__(
     self,
     obs_id=None,
     fname_on_disk=None,
     file_name=None,
     artifact_uri=None,
     entry=None,
 ):
     if obs_id is None:
         if (
             file_name is None
             and fname_on_disk is None
             and artifact_uri is None
         ):
             raise mc.CadcException(
                 f'Bad StorageName initialization for {obs_id}.'
             )
         elif file_name is not None:
             self._file_name = OmmName._add_extensions(file_name)
         elif fname_on_disk is not None:
             self._file_name = os.path.basename(
                 OmmName._add_extensions(fname_on_disk)
             )
         elif artifact_uri is not None:
             self._file_name = mc.CaomName(artifact_uri).file_name
         self._file_id = OmmName.remove_extensions(self._file_name)
         self._product_id = self._file_id.replace('_prev_256', '').replace(
             '_prev', ''
         )
         obs_id = OmmName.get_obs_id(self._file_name)
         super().__init__(
             obs_id,
             COLLECTION,
             OmmName.OMM_NAME_PATTERN,
             fname_on_disk,
             entry=entry,
             scheme='ad',
         )
     else:
         self.obs_id = obs_id
         self._file_name = None
         self._file_id = None
         self._product_id = None
         super().__init__(
             obs_id,
             COLLECTION,
             OmmName.OMM_NAME_PATTERN,
             entry=entry,
             scheme='ad',
         )
     self._source_names = [entry]
     self._destination_uris = [self.file_uri]
     self._logger = logging.getLogger(self.__class__.__name__)
     self._logger.debug(self)
示例#4
0
 def __init__(self,
              obs_id=None,
              file_name=None,
              instrument=None,
              ad_uri=None,
              entry=None):
     # set compression to an empty string so the file uri method still
     # works, since the file_name element will have all extensions,
     # including the .fz | .gz | '' to indicate compression type
     if obs_id is None:
         super(CFHTName, self).__init__(None,
                                        COLLECTION,
                                        CFHTName.CFHT_NAME_PATTERN,
                                        file_name,
                                        compression='',
                                        entry=entry)
         self._instrument = md.Inst(instrument)
         if ad_uri is not None and file_name is None:
             file_name = mc.CaomName(ad_uri).file_name
         self._file_name = file_name
         self._file_id = CFHTName.remove_extensions(file_name)
         self._suffix = self._file_id[-1]
         if self._instrument in [md.Inst.MEGAPRIME, md.Inst.MEGACAM]:
             # SF - slack - 02-04-20
             # - MegaCam - the logic should be probably be 2 planes: p
             # and o for science. - all cfht exposures are sorted by EXPNUM
             # if i understand their data acquisition. b,f,d,x should be 1
             # plane observations. - my assumption is that the b,f,d,x have
             # no reason to have a processed equivalent.
             if (self._suffix in ['b', 'd', 'f', 'x']
                     or self._suffix.isnumeric()):
                 self._obs_id = self._file_id
             else:
                 self._obs_id = self._file_id[:-1]
         else:
             if self.is_simple and not self.is_master_cal:
                 self.obs_id = self._file_id[:-1]
             else:
                 self.obs_id = self._file_id
                 if self.is_derived_sitelle:
                     self.obs_id = self.obs_id.replace(self._suffix, 'p')
     else:
         super(CFHTName, self).__init__(obs_id,
                                        COLLECTION,
                                        CFHTName.CFHT_NAME_PATTERN,
                                        compression='')
         self.obs_id = obs_id
         self._instrument = None
         self._file_id = None
         self._file_name = None
         self._file_id = None
         self._suffix = None
示例#5
0
    def get_time_box_work(self, prev_exec_time, exec_time):
        """
        :param prev_exec_time datetime start of the timestamp chunk
        :param exec_time datetime end of the timestamp chunk
        :return: a list of file names with time they were modified in /ams,
            structured as an astropy Table (for now).
        """

        self._logger.debug('Entering get_time_box_work')
        # datetime format 2019-12-01T00:00:00.000000
        prev_dt_str = datetime.fromtimestamp(
            prev_exec_time, tz=timezone.utc
        ).strftime(mc.ISO_8601_FORMAT)
        exec_dt_str = datetime.fromtimestamp(
            exec_time, tz=timezone.utc
        ).strftime(mc.ISO_8601_FORMAT)
        query = (
            f"SELECT A.uri, A.lastModified "
            f"FROM caom2.Observation AS O "
            f"JOIN caom2.Plane AS P ON O.obsID = P.obsID "
            f"JOIN caom2.Artifact AS A ON P.planeID = A.planeID "
            f"WHERE P.planeID IN ( "
            f"  SELECT A.planeID "
            f"  FROM caom2.Observation AS O "
            f"  JOIN caom2.Plane AS P ON O.obsID = P.obsID "
            f"  JOIN caom2.Artifact AS A ON P.planeID = A.planeID "
            f"  WHERE O.collection = '{self._config.collection}' "
            f"  GROUP BY A.planeID "
            f"  HAVING COUNT(A.artifactID) = 1 ) "
            f"AND P.dataRelease > '{prev_dt_str}' "
            f"AND P.dataRelease <= '{exec_dt_str}' "
            f"ORDER BY O.maxLastModified ASC "
            ""
        )
        result = clc.query_tap_client(query, self._query_client)
        # results look like:
        # gemini:GEM/N20191202S0125.fits, ISO 8601

        entries = deque()
        for row in result:
            entries.append(
                dsc.StateRunnerMeta(
                    mc.CaomName(row['uri']).file_name,
                    mc.make_time(row['lastModified']).timestamp(),
                )
            )
        return entries
def visit(observation, **kwargs):
    """
    If the observation says the data release date is past, attempt to
    retrieve the fits file if it is not already at CADC.
    """
    mc.check_param(observation, Observation)
    working_dir = kwargs.get('working_directory', './')
    cadc_client = kwargs.get('cadc_client')
    if cadc_client is None:
        logging.warning('Need a cadc_client to update. Stopping pull visitor.')
        return
    stream = kwargs.get('stream')
    if stream is None:
        raise mc.CadcException('Visitor needs a stream parameter.')
    observable = kwargs.get('observable')
    if observable is None:
        raise mc.CadcException('Visitor needs a observable parameter.')

    count = 0
    if observable.rejected.is_bad_metadata(observation.observation_id):
        logging.info(f'Stopping visit for {observation.observation_id} '
                     f'because of bad metadata.')
    else:
        for plane in observation.planes.values():
            if (plane.data_release is None
                    or plane.data_release > datetime.utcnow()):
                logging.error(f'Plane {plane.product_id} is proprietary '
                              f'until {plane.data_release}. No file access.')
                continue

            for artifact in plane.artifacts.values():
                if gem_name.GemName.is_preview(artifact.uri):
                    continue
                try:
                    f_name = mc.CaomName(artifact.uri).file_name
                    file_url = '{}/{}'.format(FILE_URL, f_name)
                    mc.look_pull_and_put(f_name, working_dir, file_url,
                                         gem_name.ARCHIVE, stream, MIME_TYPE,
                                         cadc_client,
                                         artifact.content_checksum.checksum,
                                         observable.metrics)
                except Exception as e:
                    if not (observable.rejected.check_and_record(
                            str(e), observation.observation_id)):
                        raise e
    logging.info(f'Completed pull visitor for {observation.observation_id}.')
    return {'observation': count}
def _do_prev(artifact, plane, working_dir, cadc_client, stream, observable):
    naming = mc.CaomName(artifact.uri)
    neoss_name = NEOSSatName(file_name=naming.file_name)
    preview = neoss_name.prev
    preview_fqn = os.path.join(working_dir, preview)
    thumb = neoss_name.thumb
    thumb_fqn = os.path.join(working_dir, thumb)
    science_fqn = os.path.join(working_dir, naming.file_name)

    image_data = fits.getdata(science_fqn, ext=0)
    image_header = fits.getheader(science_fqn, ext=0)

    _generate_plot(preview_fqn, 1024, image_data, image_header)
    _generate_plot(thumb_fqn, 256, image_data, image_header)

    prev_uri = neoss_name.prev_uri
    thumb_uri = neoss_name.thumb_uri
    _store_smalls(cadc_client, working_dir, preview, thumb, observable.metrics,
                  stream)
    _augment(plane, prev_uri, preview_fqn, ProductType.PREVIEW)
    _augment(plane, thumb_uri, thumb_fqn, ProductType.THUMBNAIL)
    return 2
示例#8
0
def visit(observation, **kwargs):
    """
    If the observation says the data release date is past, attempt to
    retrieve the fits file if it is not already at CADC.
    """
    mc.check_param(observation, Observation)
    working_dir = kwargs.get('working_directory', './')
    clients = kwargs.get('clients')
    if clients is None:
        logging.warning('Need clients to update. Stopping pull visitor.')
        return
    observable = kwargs.get('observable')
    if observable is None:
        raise mc.CadcException('Visitor needs a observable parameter.')
    metadata_reader = kwargs.get('metadata_reader')
    if metadata_reader is None:
        raise mc.CadcException('Visitor needs a metadata_reader parameter.')
    storage_name = kwargs.get('storage_name')
    if storage_name is None:
        raise mc.CadcException('Visitor needs a storage_name parameter.')

    count = 0
    if observable.rejected.is_bad_metadata(observation.observation_id):
        logging.info(f'Stopping visit for {observation.observation_id} '
                     f'because of bad metadata.')
    else:
        for plane in observation.planes.values():
            if (plane.data_release is None
                    or plane.data_release > datetime.utcnow()):
                logging.info(
                    f'Plane {plane.product_id} is proprietary. No file '
                    f'access.')
                continue

            for artifact in plane.artifacts.values():
                # compare file names, because part of this visitor is to
                # change the URIs
                artifact_f_name = artifact.uri.split('/')[-1]
                if artifact_f_name != storage_name.file_name:
                    logging.debug(
                        f'Leave {artifact.uri}, want {storage_name.file_uri}')
                    continue
                try:
                    f_name = mc.CaomName(artifact.uri).file_name
                    if '.jpg' not in f_name:
                        logging.debug(f'Checking for {f_name}')
                        file_url = f'{FILE_URL}/{f_name}'
                        fqn = os.path.join(working_dir, f_name)

                        # want to compare the checksum from the JSON, and the
                        # checksum at CADC storage - if they are not the same,
                        # retrieve the file from archive.gemini.edu again
                        json_md5sum = metadata_reader.file_info.get(
                            artifact.uri).md5sum
                        look_pull_and_put(artifact.uri, fqn, file_url, clients,
                                          json_md5sum)
                        if os.path.exists(fqn):
                            logging.info(
                                f'Removing local copy of {f_name} after '
                                f'successful storage call.')
                            os.unlink(fqn)
                except Exception as e:
                    if not (observable.rejected.check_and_record(
                            str(e), observation.observation_id)):
                        raise e
    logging.info(f'Completed pull visitor for {observation.observation_id}.')
    result = {'observation': count}
    return observation
示例#9
0
def get_proposal_id(uri):
    caom_name = mc.CaomName(uri)
    bits = caom_name.file_name.split('.')
    return '{}.{}'.format(bits[0], bits[1])
            pass
        print(f'::: create observation {collection} {obs_id}')
        caom_client.create(actual_obs)
        print(f'::: read observation from sc2repo')
        obs_from_service = caom_client.read(collection, obs_id)
        mc.write_obs_to_file(obs_from_service, round_trip_fqn)
        try:
            msg = mc.compare_observations(round_trip_fqn, expected_fqn)
            print(msg)
        except Exception as e:
            print(f'comparison of {round_trip_fqn} and {expected_fqn} failed')
            print(e)
        for plane in obs_from_service.planes.values():
            for artifact in plane.artifacts.values():
                if '.fits' in artifact.uri:
                    f_name = mc.CaomName(uri=artifact.uri).file_name
                    todo_list.append(f_name)

# check that no clean up occurred, because this was supposed to be
# a SCRAPE + MODIFY configuration, where cleaning up doesn't make
# sense
if question(collection):
    for ii in ['/data/failure', '/data/success']:
        listing = os.listdir(ii)
        if len(listing) > 0:
            assert False, f'Bad cleanup. There should be no files in {ii}.'

print('::: update the config for ingest')

config = mc.Config()
config.get_executors()
示例#11
0
def get_target_name(uri):
    return mc.CaomName(uri).file_name.split('.')[0]
示例#12
0
def update(observation, **kwargs):
    """Called to fill multiple CAOM model elements and/or attributes (an n:n
    relationship between TDM attributes and CAOM attributes). Must have this
    signature for import_module loading and execution.

    :param observation A CAOM Observation model instance.
    :param **kwargs Everything else."""
    logging.debug('Begin update.')
    mc.check_param(observation, Observation)

    headers = kwargs.get('headers')
    fqn = kwargs.get('fqn')
    uri = kwargs.get('uri')
    gem_proc_name = None
    # ok not to use builder here, since the obs_id value is never used later
    if uri is not None:
        temp = mc.CaomName(uri).file_name
        gem_proc_name = builder.GemProcName(entry=temp)
    if fqn is not None:
        gem_proc_name = builder.GemProcName(entry=fqn)
    if gem_proc_name is None:
        raise mc.CadcException(f'Need one of fqn or uri defined for '
                               f'{observation.observation_id}')

    for plane in observation.planes.values():
        if plane.product_id != gem_proc_name.product_id:
            continue

        for artifact in plane.artifacts.values():
            for part in artifact.parts.values():
                idx = mc.to_int(part.name)
                header = headers[idx]
                extname = header.get('EXTNAME')
                # DB 22-07-20
                # There are a few other EXTNAME values to look at for
                # part.ProductType.   MDF values would be ‘AUXILIARY’.  The
                # ones currently called “CAL” are likely best set to ‘INFO’
                # since it contains info about datasets used to produce the
                # product.
                #
                # DB 07-08-20
                # EXTNAME  in (‘DQ’, ‘VAR’) should both have
                # ProductType.NOISE.   ‘CAL’ should no longer exist - it’s now
                # BPM. Default type is 'AUXILIARY', 'SCI' is type 'SCIENCE'
                if extname == 'SCI':
                    part.product_type = ProductType.SCIENCE
                elif extname in ['DQ', 'VAR']:
                    part.product_type = ProductType.NOISE
                else:
                    part.product_type = ProductType.AUXILIARY

                if part.product_type in [
                        ProductType.SCIENCE,
                        ProductType.INFO,
                ]:
                    for chunk in part.chunks:
                        filter_name = headers[0].get('FILTER').split('_')[0]
                        _update_energy(
                            chunk,
                            headers[idx],
                            filter_name,
                            observation.observation_id,
                        )
                        _update_time(part, chunk, headers[0],
                                     observation.observation_id)
                        if part.product_type == ProductType.SCIENCE:
                            _update_spatial_wcs(
                                part,
                                chunk,
                                headers,
                                observation.observation_id,
                            )
                            chunk.naxis = header.get('NAXIS')
                            if (chunk.position is None
                                    and chunk.naxis is not None):
                                chunk.naxis = None

                        if (chunk.time is not None
                                and chunk.time.axis is not None
                                and chunk.time.axis.function is not None
                                and chunk.time.axis.function.delta == 1.0):
                            # these are the default values, and they make
                            # the time range start in 1858
                            chunk.time = None
                else:
                    # DB 21-07-20
                    # ignore WCS information unless product type == SCIENCE
                    while len(part.chunks) > 0:
                        del part.chunks[-1]

    if (observation.proposal is not None
            and observation.proposal.id is not None
            and observation.proposal.pi_name is None):
        program = program_metadata.get_pi_metadata(observation.proposal.id)
        if program is not None:
            observation.proposal.pi_name = program.get('pi_name')
            observation.proposal.title = program.get('title')

    if isinstance(observation, SimpleObservation):
        # undo the observation-level metadata modifications for updated
        # Gemini records
        observation.algorithm = Algorithm(name='exposure')
    else:
        cc.update_observation_members(observation)
    logging.debug('Done update.')
    return observation
示例#13
0
 def get_proposal_id(self, ext):
     caom_name = mc.CaomName(self._storage_name.file_uri)
     bits = caom_name.file_name.split('.')
     return f'{bits[0]}.{bits[1]}'
示例#14
0
def test_omm_name_dots():
    TEST_NAME = 'C121121_J024345.57-021326.4_K_SCIRED'
    TEST_URI = f'ad:OMM/{TEST_NAME}.fits.gz'
    test_file_id = mc.CaomName(TEST_URI).file_id
    assert TEST_NAME == test_file_id, 'dots messing with things'