def testTimescales(self): """Test time scale conversion occurs on comparison.""" ts1 = Timespan(begin=astropy.time.Time('2013-06-17 13:34:45.775000', scale='tai', format='iso'), end=astropy.time.Time('2013-06-17 13:35:17.947000', scale='tai', format='iso')) ts2 = Timespan(begin=astropy.time.Time('2013-06-17T13:34:10.775', scale='utc', format='isot'), end=astropy.time.Time('2013-06-17T13:34:42.947', scale='utc', format='isot')) self.assertEqual(ts1, ts2, f"Compare {ts1} with {ts2}")
def testFuture(self): """Check that we do not get warnings from future dates.""" # Astropy will give "dubious year" for UTC five years in the future # so hide these expected warnings from the test output with warnings.catch_warnings(): warnings.simplefilter( "ignore", category=astropy.utils.exceptions.AstropyWarning) if erfa is not None: warnings.simplefilter("ignore", category=erfa.ErfaWarning) ts1 = Timespan(begin=astropy.time.Time(self.timestamps[0], scale='utc', format='iso'), end=astropy.time.Time('2099-06-17 13:35:17.947000', scale='utc', format='iso')) ts2 = Timespan(begin=astropy.time.Time(self.timestamps[0], scale='utc', format='iso'), end=astropy.time.Time('2099-06-17 13:35:17.947000', scale='utc', format='iso')) # unittest can't test for no warnings so we run the test and # trigger our own warning and count all the warnings with self.assertWarns(Warning) as cm: self.assertEqual(ts1, ts2) warnings.warn("deliberate") self.assertEqual(str(cm.warning), "deliberate")
def setUp(self): start = astropy.time.Time('2020-01-01T00:00:00', format="isot", scale="tai") offset = astropy.time.TimeDelta(60, format="sec") self.timestamps = [start + offset*n for n in range(3)] self.timespans = [Timespan(begin=None, end=None)] self.timespans.extend(Timespan(begin=None, end=t) for t in self.timestamps) self.timespans.extend(Timespan(begin=t, end=None) for t in self.timestamps) self.timespans.extend(Timespan(begin=t, end=t) for t in self.timestamps) self.timespans.extend(Timespan(begin=a, end=b) for a, b in itertools.combinations(self.timestamps, 2))
def test_RangeTimespanType(self): start = astropy.time.Time('2020-01-01T00:00:00', format="isot", scale="tai") offset = astropy.time.TimeDelta(60, format="sec") timestamps = [start + offset*n for n in range(3)] timespans = [Timespan(begin=None, end=None)] timespans.extend(Timespan(begin=None, end=t) for t in timestamps) timespans.extend(Timespan(begin=t, end=None) for t in timestamps) timespans.extend(Timespan(begin=a, end=b) for a, b in itertools.combinations(timestamps, 2)) db = self.makeEmptyDatabase(origin=1) with db.declareStaticTables(create=True) as context: tbl = context.addTable( "tbl", ddl.TableSpec( fields=[ ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True), ddl.FieldSpec(name="timespan", dtype=_RangeTimespanType), ], ) ) rows = [{"id": n, "timespan": t} for n, t in enumerate(timespans)] db.insert(tbl, *rows) # Test basic round-trip through database. self.assertEqual( rows, [dict(row) for row in db.query(tbl.select().order_by(tbl.columns.id)).fetchall()] ) # Test that Timespan's Python methods are consistent with our usage of # half-open ranges and PostgreSQL operators on ranges. def subquery(alias: str) -> sqlalchemy.sql.FromClause: return sqlalchemy.sql.select( [tbl.columns.id.label("id"), tbl.columns.timespan.label("timespan")] ).select_from( tbl ).alias(alias) sq1 = subquery("sq1") sq2 = subquery("sq2") query = sqlalchemy.sql.select([ sq1.columns.id.label("n1"), sq2.columns.id.label("n2"), sq1.columns.timespan.overlaps(sq2.columns.timespan).label("overlaps"), ]) dbResults = { (row[query.columns.n1], row[query.columns.n2]): row[query.columns.overlaps] for row in db.query(query) } pyResults = { (n1, n2): t1.overlaps(t2) for (n1, t1), (n2, t2) in itertools.product(enumerate(timespans), enumerate(timespans)) } self.assertEqual(pyResults, dbResults)
def testPrecision(self): """Test that we only use nanosecond precision for equality.""" ts1 = self.timespans[-1] ts2 = Timespan(begin=ts1.begin + astropy.time.TimeDelta(1e-10, format="sec"), end=ts1.end) self.assertEqual(ts1, ts2) self.assertEqual(Timespan(begin=None, end=None), Timespan(begin=None, end=None)) self.assertEqual(Timespan(begin=None, end=ts1.end), Timespan(begin=None, end=ts1.end)) ts2 = Timespan(begin=ts1.begin + astropy.time.TimeDelta(1e-8, format="sec"), end=ts1.end) self.assertNotEqual(ts1, ts2) ts2 = Timespan(begin=None, end=ts1.end) self.assertNotEqual(ts1, ts2) t1 = Timespan(begin=astropy.time.Time(2456461.0, val2=0.06580758101851847, format="jd", scale="tai"), end=astropy.time.Time(2456461.0, val2=0.06617994212962963, format="jd", scale="tai")) t2 = Timespan(begin=astropy.time.Time(2456461.0, val2=0.06580758101851858, format="jd", scale="tai"), end=astropy.time.Time(2456461.0, val2=0.06617994212962963, format="jd", scale="tai")) self.assertEqual(t1, t2) # Ensure that == and != work properly self.assertTrue(t1 == t2, f"Equality of {t1} and {t2}") self.assertFalse(t1 != t2, f"Check != is false for {t1} and {t2}")
def testInvalid(self): """Test that we reject timespans that should not exist. """ with self.assertRaises(ValueError): Timespan(TimeConverter().max_time, None) with self.assertRaises(ValueError): Timespan(TimeConverter().max_time, TimeConverter().max_time) with self.assertRaises(ValueError): Timespan(None, TimeConverter().epoch) with self.assertRaises(ValueError): Timespan(TimeConverter().epoch, TimeConverter().epoch) t = TimeConverter().nsec_to_astropy(TimeConverter().max_nsec - 1) with self.assertRaises(ValueError): Timespan(t, t) with self.assertRaises(ValueError): Timespan.fromInstant(t)
def testJson(self): ts1 = Timespan(begin=astropy.time.Time('2013-06-17 13:34:45.775000', scale='tai', format='iso'), end=astropy.time.Time('2013-06-17 13:35:17.947000', scale='tai', format='iso')) json_str = ts1.to_json() ts_json = Timespan.from_json(json_str) self.assertEqual(ts_json, ts1)
def testFromInstant(self): """Test construction of instantaneous timespans. """ self.assertEqual(Timespan.fromInstant(self.timestamps[0]), Timespan(self.timestamps[0], self.timestamps[0]))
def testGetCalibration(self): """Test that `Butler.get` can be used to fetch from `~CollectionType.CALIBRATION` collections if the data ID includes extra dimensions with temporal information. """ # Import data to play with. butler = self.makeButler(writeable=True) butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) # Certify some biases into a CALIBRATION collection. registry = butler.registry registry.registerCollection("calibs", CollectionType.CALIBRATION) t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) registry.certify("calibs", [bias2b], Timespan(t2, None)) registry.certify("calibs", [bias3b], Timespan(t2, t3)) # Insert some exposure dimension data. registry.insertDimensionData( "exposure", { "instrument": "Cam1", "id": 3, "obs_id": "three", "timespan": Timespan(t1, t2), "physical_filter": "Cam1-G", "day_obs": 20201114, "seq_num": 55, }, { "instrument": "Cam1", "id": 4, "obs_id": "four", "timespan": Timespan(t2, t3), "physical_filter": "Cam1-G", "day_obs": 20211114, "seq_num": 42, }, ) # Get some biases from raw-like data IDs. bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs") self.assertEqual(bias2a_id, bias2a.id) bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs") self.assertEqual(bias3b_id, bias3b.id) # Get using the kwarg form bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") self.assertEqual(bias3b_id, bias3b.id) # Do it again but using the record information bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, collections="calibs") self.assertEqual(bias2a_id, bias2a.id) bias3b_id, _ = butler.get("bias", {"exposure.obs_id": "four", "detector.full_name": "Ba"}, collections="calibs", instrument="Cam1") self.assertEqual(bias3b_id, bias3b.id) # And again but this time using the alternate value rather than # the primary. bias3b_id, _ = butler.get("bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1") self.assertEqual(bias3b_id, bias3b.id) # And again but this time using the alternate value rather than # the primary and do it in the keyword arguments. bias3b_id, _ = butler.get("bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1") self.assertEqual(bias3b_id, bias3b.id) # Now with implied record columns bias3b_id, _ = butler.get("bias", day_obs=20211114, seq_num=42, raft="B", name_in_raft="a", collections="calibs", instrument="Cam1") self.assertEqual(bias3b_id, bias3b.id)
def testCollectionTransfers(self): """Test exporting and then importing collections of various types. """ # Populate a registry with some datasets. butler1 = self.makeButler(writeable=True) butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) registry1 = butler1.registry # Add some more collections. registry1.registerRun("run1") registry1.registerCollection("tag1", CollectionType.TAGGED) registry1.registerCollection("calibration1", CollectionType.CALIBRATION) registry1.registerCollection("chain1", CollectionType.CHAINED) registry1.registerCollection("chain2", CollectionType.CHAINED) registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) registry1.setCollectionChain("chain2", ["calibration1", "run1"]) # Associate some datasets into the TAGGED and CALIBRATION collections. flats1 = list(registry1.queryDatasets("flat", collections=...)) registry1.associate("tag1", flats1) t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) registry1.certify("calibration1", [bias2b], Timespan(t2, None)) registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file: # Export all collections, and some datasets. with butler1.export(filename=file.name) as exporter: # Sort results to put chain1 before chain2, which is # intentionally not topological order. for collection in sorted(registry1.queryCollections()): exporter.saveCollection(collection) exporter.saveDatasets(flats1) exporter.saveDatasets([bias2a, bias2b, bias3a, bias3b]) # Import them into a new registry. butler2 = self.makeButler(writeable=True) butler2.import_(filename=file.name) registry2 = butler2.registry # Check that it all round-tripped, starting with the collections # themselves. self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) self.assertEqual( list(registry2.getCollectionChain("chain1")), ["tag1", "run1", "chain2"], ) self.assertEqual( list(registry2.getCollectionChain("chain2")), ["calibration1", "run1"], ) # Check that tag collection contents are the same. self.maxDiff = None self.assertCountEqual( [ref.unresolved() for ref in registry1.queryDatasets(..., collections="tag1")], [ref.unresolved() for ref in registry2.queryDatasets(..., collections="tag1")], ) # Check that calibration collection contents are the same. self.assertCountEqual( [(assoc.ref.unresolved(), assoc.timespan) for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")], [(assoc.ref.unresolved(), assoc.timespan) for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")], )
def test_ingest(self): fitsPath = os.path.join(TESTDIR, "data", "small.fits") formatter = FORMATTERS[0] datasetTypeName, formatterCls = (formatter["dataset_type"], formatter["formatter_cls"]) datasetType = self.butler.registry.getDatasetType(datasetTypeName) datasets = [] for exposure in range(3, 5): for detector in range(6): # use the same fits to test ingest if not os.path.exists(fitsPath): log.warning( f"No data found for detector {detector}, exposure {exposure} @ {fitsPath}." ) continue ref = DatasetRef(datasetType, dataId={ "instrument": INSTRUMENT_NAME, "detector": detector, "exposure": exposure * 11 }) datasets.append( FileDataset(refs=ref, path=fitsPath, formatter=formatterCls)) # register new collection # run = "rawIngestedRun" # self.butler.registry.registerCollection(run, type=CollectionType.RUN) # collection is registered as a part of setUp run = self.collection with self.butler.transaction(): for exposure in range(3, 5): expid = exposure * 11 self.butler.registry.insertDimensionData( "exposure", { "instrument": INSTRUMENT_NAME, "id": expid, "name": f"{expid}", "group_name": "day1", "timespan": Timespan(begin=None, end=None) }) # transfer can be 'auto', 'move', 'copy', 'hardlink', 'relsymlink' # or 'symlink' self.butler.ingest(*datasets, transfer="symlink", run=run) # verify that 12 files were ingested (2 exposures for each detector) refsSet = set( self.butler.registry.queryDatasets(datasetTypeName, collections=[run])) self.assertEqual( len(refsSet), 12, f"Collection {run} should have 12 elements after ingest") # verify that data id is present dataid = {"exposure": 44, "detector": 5, "instrument": INSTRUMENT_NAME} refsList = list( self.butler.registry.queryDatasets(datasetTypeName, collections=[run], dataId=dataid)) self.assertEqual( len(refsList), 1, f"Collection {run} should have 1 element with {dataid}")
def _buildVisitRecords(self, definition: VisitDefinitionData, *, collections: Any = None) -> _VisitRecords: """Build the DimensionRecords associated with a visit. Parameters ---------- definition : `VisitDefinition` Struct with identifiers for the visit and records for its constituent exposures. collections : Any, optional Collections to be searched for raws and camera geometry, overriding ``self.butler.collections``. Can be any of the types supported by the ``collections`` argument to butler construction. Results ------- records : `_VisitRecords` Struct containing DimensionRecords for the visit, including associated dimension elements. """ # Compute all regions. visitRegion, visitDetectorRegions = self.computeVisitRegions.compute( definition, collections=collections) # Aggregate other exposure quantities. timespan = Timespan( begin=_reduceOrNone(min, (e.timespan.begin for e in definition.exposures)), end=_reduceOrNone(max, (e.timespan.end for e in definition.exposures)), ) exposure_time = _reduceOrNone(sum, (e.exposure_time for e in definition.exposures)) physical_filter = _reduceOrNone(lambda a, b: a if a == b else None, (e.physical_filter for e in definition.exposures)) target_name = _reduceOrNone(lambda a, b: a if a == b else None, (e.target_name for e in definition.exposures)) science_program = _reduceOrNone(lambda a, b: a if a == b else None, (e.science_program for e in definition.exposures)) # observing day for a visit is defined by the earliest observation # of the visit observing_day = _reduceOrNone(min, (e.day_obs for e in definition.exposures)) observation_reason = _reduceOrNone(lambda a, b: a if a == b else None, (e.observation_reason for e in definition.exposures)) if observation_reason is None: # Be explicit about there being multiple reasons observation_reason = "various" # Use the mean zenith angle as an approximation zenith_angle = _reduceOrNone(sum, (e.zenith_angle for e in definition.exposures)) if zenith_angle is not None: zenith_angle /= len(definition.exposures) # Construct the actual DimensionRecords. return _VisitRecords( visit=self.universe["visit"].RecordClass( instrument=definition.instrument, id=definition.id, name=definition.name, physical_filter=physical_filter, target_name=target_name, science_program=science_program, observation_reason=observation_reason, day_obs=observing_day, zenith_angle=zenith_angle, visit_system=self.groupExposures.getVisitSystem()[0], exposure_time=exposure_time, timespan=timespan, region=visitRegion, # TODO: no seeing value in exposure dimension records, so we # can't set that here. But there are many other columns that # both dimensions should probably have as well. ), visit_definition=[ self.universe["visit_definition"].RecordClass( instrument=definition.instrument, visit=definition.id, exposure=exposure.id, visit_system=self.groupExposures.getVisitSystem()[0], ) for exposure in definition.exposures ], visit_detector_region=[ self.universe["visit_detector_region"].RecordClass( instrument=definition.instrument, visit=definition.id, detector=detectorId, region=detectorRegion, ) for detectorId, detectorRegion in visitDetectorRegions.items() ])
def ingestSimulated(repo, locations, regex, output_run, transfer="auto", ingest_type="rawexp"): """Ingests raw frames into the butler registry Parameters ---------- repo : `str` URI to the repository. locations : `list` [`str`] Files to ingest and directories to search for files that match ``regex`` to ingest. regex : `str` Regex string used to find files in directories listed in locations. output_run : `str` The path to the location, the run, where datasets should be put. transfer : `str` or None The external data transfer type, by default "auto". ingest_type : `str` ingest product data type. Raises ------ Exception Raised if operations on configuration object fail. Notes ----- This method inserts all datasets for an exposure within a transaction, guaranteeing that partial exposures are never ingested. The exposure dimension record is inserted with `Registry.syncDimensionData` first (in its own transaction), which inserts only if a record with the same primary key does not already exist. This allows different files within the same exposure to be incremented in different runs. """ butler = Butler(repo, writeable=True) # make sure instrument and detector dimensions are populated with butler.registry.transaction(): instrument_record = { "name": "simulator", "exposure_max": 600000, "detector_max": 6, "class_name": "spherex.instrument.SimulatorInstrument" } butler.registry.syncDimensionData("instrument", instrument_record) for idx in range(1, 7): detector_record = { "instrument": "simulator", "id": idx, "full_name": f"array{idx}" } butler.registry.syncDimensionData("detector", detector_record) dimension_universe = butler.registry.dimensions datasetType = DatasetType(ingest_type, dimension_universe.extract( ("instrument", "detector", "exposure")), "SPHERExImage", universe=dimension_universe) # idempotent dataset type registration butler.registry.registerDatasetType(datasetType) # idempotent collection registration run = f"{ingest_type}r" if (output_run is None) else output_run butler.registry.registerCollection(run, type=CollectionType.RUN) n_failed = 0 files = findFileResources(locations, regex) # example: sim_exposure_000000_array_1.fits or # sim_exposure_000000_array_2_dark_current.fits pattern = re.compile(r"sim_exposure_(\d+)_array_(\d)[_,.]") # do we want to group observations? grp = datetime.date.today().strftime("%Y%m%d") datasets = [] for file in files: # parse exposure and detector ids from file name m = pattern.search(file) if m is None: n_failed += 1 logging.error(f"{file} does not match simulator file pattern") continue else: g = m.groups() if len(g) != 2: n_failed += 1 logging.error( f"Unable to get exposure and detector from file name: {file}" ) continue else: [exposure_id, detector_id] = list(map(int, g)) try: exposure_record = { "instrument": "simulator", "id": exposure_id, "name": f"{exposure_id:06d}", "group_name": f"{grp}", "timespan": Timespan(begin=None, end=None) } # idempotent insertion of individual dimension rows butler.registry.syncDimensionData("exposure", exposure_record) except Exception as e: n_failed += 1 logging.error( f"Unable to insert exposure record for file {file}: {e}") continue dataId = DataCoordinate.standardize( instrument="simulator", detector=detector_id, exposure=exposure_id, universe=butler.registry.dimensions) ref = DatasetRef(datasetType, dataId=dataId) datasets.append( FileDataset(refs=ref, path=file, formatter=AstropyImageFormatter)) with butler.transaction(): butler.ingest(*datasets, transfer=transfer, run=run)
def ingestStrayLightData(self, butler, directory, *, transfer=None, collection=None, labels=()): """Ingest externally-produced y-band stray light data files into a data repository. Parameters ---------- butler : `lsst.daf.butler.Butler` Butler to write with. Any collections associated with it are ignored in favor of ``collection`` and/or ``labels``. directory : `str` Directory containing yBackground-*.fits files. transfer : `str`, optional If not `None`, must be one of 'move', 'copy', 'hardlink', or 'symlink', indicating how to transfer the files. collection : `str`, optional Name to use for the calibration collection that associates all datasets with a validity range. If this collection already exists, it must be a `~CollectionType.CALIBRATION` collection, and it must not have any datasets that would conflict with those inserted by this method. If `None`, a collection name is worked out automatically from the instrument name and other metadata by calling ``makeCuratedCalibrationCollectionName``, but this default name may not work well for long-lived repositories unless ``labels`` is also provided (and changed every time curated calibrations are ingested). labels : `Sequence` [ `str` ], optional Extra strings to include in collection names, after concatenating them with the standard collection name delimeter. If provided, these are inserted into to the names of the `~CollectionType.RUN` collections that datasets are inserted directly into, as well the `~CollectionType.CALIBRATION` collection if it is generated automatically (i.e. if ``collection is None``). Usually this is just the name of the ticket on which the calibration collection is being created. """ # Register the CALIBRATION collection that adds validity ranges. # This does nothing if it is already registered. if collection is None: collection = self.makeCalibrationCollectionName(*labels) butler.registry.registerCollection(collection, type=CollectionType.CALIBRATION) # Register the RUN collection that holds these datasets directly. We # only need one because there is only one validity range and hence no # data ID conflicts even when there are no validity ranges. run = self.makeUnboundedCalibrationRunName(*labels) butler.registry.registerRun(run) # LEDs covered up around 2018-01-01, no need for correctin after that # date. timespan = Timespan(begin=None, end=astropy.time.Time("2018-01-01", format="iso", scale="tai")) datasets = [] # TODO: should we use a more generic name for the dataset type? # This is just the (rather HSC-specific) name used in Gen2, and while # the instances of this dataset are camera-specific, the datasetType # (which is used in the generic IsrTask) should not be. datasetType = DatasetType("yBackground", dimensions=( "physical_filter", "detector", ), storageClass="StrayLightData", universe=butler.registry.dimensions, isCalibration=True) for detector in self.getCamera(): path = os.path.join(directory, f"ybackground-{detector.getId():03d}.fits") if not os.path.exists(path): log.warning( f"No stray light data found for detector {detector.getId()} @ {path}." ) continue ref = DatasetRef(datasetType, dataId={ "instrument": self.getName(), "detector": detector.getId(), "physical_filter": "HSC-Y" }) datasets.append( FileDataset(refs=ref, path=path, formatter=SubaruStrayLightDataFormatter)) butler.registry.registerDatasetType(datasetType) with butler.transaction(): butler.ingest(*datasets, transfer=transfer, run=run) refs = [] for dataset in datasets: refs.extend(dataset.refs) butler.registry.certify(collection, refs, timespan)
def writeAdditionalCuratedCalibrations(self, butler, collection=None, labels=()): # Register the CALIBRATION collection that adds validity ranges. # This does nothing if it is already registered. if collection is None: collection = self.makeCalibrationCollectionName(*labels) butler.registry.registerCollection(collection, type=CollectionType.CALIBRATION) # Register the RUN collection that holds these datasets directly. We # only need one because all of these datasets have the same (unbounded) # validity range right now. run = self.makeUnboundedCalibrationRunName(*labels) butler.registry.registerRun(run) baseDataId = butler.registry.expandDataId(instrument=self.getName()) refs = [] # Write brighter-fatter kernel, with an infinite validity range. datasetType = DatasetType("bfKernel", ("instrument", ), "NumpyArray", universe=butler.registry.dimensions, isCalibration=True) butler.registry.registerDatasetType(datasetType) # Load and then put instead of just moving the file in part to ensure # the version in-repo is written with Python 3 and does not need # `encoding='latin1'` to be read. bfKernel = self.getBrighterFatterKernel() refs.append(butler.put(bfKernel, datasetType, baseDataId, run=run)) # The following iterate over the values of the dictionaries returned # by the transmission functions and ignore the date that is supplied. # This is due to the dates not being ranges but single dates, # which do not give the proper notion of validity. As such unbounded # calibration labels are used when inserting into the database. # In the future these could and probably should be updated to # properly account for what ranges are considered valid. # Write optical transmissions opticsTransmissions = getOpticsTransmission() datasetType = DatasetType("transmission_optics", ("instrument", ), "TransmissionCurve", universe=butler.registry.dimensions, isCalibration=True) butler.registry.registerDatasetType(datasetType) for entry in opticsTransmissions.values(): if entry is None: continue refs.append(butler.put(entry, datasetType, baseDataId, run=run)) # Write transmission sensor sensorTransmissions = getSensorTransmission() datasetType = DatasetType("transmission_sensor", ( "instrument", "detector", ), "TransmissionCurve", universe=butler.registry.dimensions, isCalibration=True) butler.registry.registerDatasetType(datasetType) for entry in sensorTransmissions.values(): if entry is None: continue for sensor, curve in entry.items(): dataId = DataCoordinate.standardize(baseDataId, detector=sensor) refs.append(butler.put(curve, datasetType, dataId, run=run)) # Write filter transmissions filterTransmissions = getFilterTransmission() datasetType = DatasetType("transmission_filter", ( "instrument", "physical_filter", ), "TransmissionCurve", universe=butler.registry.dimensions, isCalibration=True) butler.registry.registerDatasetType(datasetType) for entry in filterTransmissions.values(): if entry is None: continue for band, curve in entry.items(): dataId = DataCoordinate.standardize(baseDataId, physical_filter=band) refs.append(butler.put(curve, datasetType, dataId, run=run)) # Write atmospheric transmissions atmosphericTransmissions = getAtmosphereTransmission() datasetType = DatasetType("transmission_atmosphere", ("instrument", ), "TransmissionCurve", universe=butler.registry.dimensions, isCalibration=True) butler.registry.registerDatasetType(datasetType) for entry in atmosphericTransmissions.values(): if entry is None: continue refs.append( butler.put(entry, datasetType, {"instrument": self.getName()}, run=run)) # Associate all datasets with the unbounded validity range. butler.registry.certify(collection, refs, Timespan(begin=None, end=None))
def testEmpty(self): """Test various ways to construct an empty timespan, and that operations on empty timespans yield the expected behavior. """ self.assertEqual( Timespan.makeEmpty(), Timespan(Timespan.EMPTY, Timespan.EMPTY), ) self.assertEqual( Timespan.makeEmpty(), Timespan(self.timestamps[1], self.timestamps[0]), ) self.assertEqual( Timespan.makeEmpty(), Timespan(Timespan.EMPTY, self.timestamps[0]), ) self.assertEqual( Timespan.makeEmpty(), Timespan(self.timestamps[0], Timespan.EMPTY), ) self.assertEqual( Timespan.makeEmpty(), Timespan(self.timestamps[0], self.timestamps[0], padInstantaneous=False) ) empty = Timespan.makeEmpty() for t in self.timestamps: with self.subTest(t=str(t)): self.assertFalse(empty < t) self.assertFalse(empty > t) self.assertFalse(t < empty) self.assertFalse(t > empty) self.assertFalse(empty.contains(t)) for t in self.timespans: with self.subTest(t=str(t)): self.assertTrue(t.contains(empty)) self.assertFalse(t.overlaps(empty)) self.assertFalse(empty.overlaps(t)) self.assertEqual(empty.contains(t), t.isEmpty()) self.assertFalse(empty < t) self.assertFalse(t < empty) self.assertFalse(empty > t) self.assertFalse(t > empty)
def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]]): # Docstring inherited from RepoConverter. # Read Gen2 calibration repository and extract validity ranges for # all datasetType + calibDate combinations we ingested. calibFile = os.path.join(self.root, "calibRegistry.sqlite3") # If the registry file does not exist this indicates a problem. # We check explicitly because sqlite will try to create the # missing file if it can. if not os.path.exists(calibFile): raise RuntimeError("Attempting to convert calibrations but no registry database" f" found in {self.root}") # Initially we collate timespans for each dataId + dataset type # combination. This allows us to check for small gaps or overlaps # inherent in the ambiguous usage of validity ranges in gen2 timespansByDataId = defaultdict(list) db = sqlite3.connect(calibFile) db.row_factory = sqlite3.Row for datasetType, datasetsByCalibDate in datasets.items(): if not datasetType.isCalibration(): continue gen2keys = {} if "detector" in datasetType.dimensions.names: gen2keys[self.task.config.ccdKey] = int if "physical_filter" in datasetType.dimensions.names: gen2keys["filter"] = str translator = self.instrument.makeDataIdTranslatorFactory().makeMatching( datasetType.name, gen2keys, instrument=self.instrument.getName() ) for calibDate, datasetsForCalibDate in datasetsByCalibDate.items(): assert calibDate is not None, ("datasetType.isCalibration() is set by " "the presence of calibDate in the Gen2 template") # Build a mapping that lets us find DatasetRefs by data ID, # for this DatasetType and calibDate. We know there is only # one ref for each data ID (given DatasetType and calibDate as # well). refsByDataId = {} for dataset in datasetsForCalibDate: refsByDataId.update((ref.dataId, ref) for ref in dataset.refs) # Query the Gen2 calibration repo for the validity ranges for # this DatasetType and calibDate, and look up the appropriate # refs by data ID. for row in self._queryGen2CalibRegistry(db, datasetType, calibDate): # For validity times we use TAI as some gen2 repos have # validity dates very far in the past or future. timespan = Timespan( astropy.time.Time(row["validStart"], format="iso", scale="tai"), astropy.time.Time(row["validEnd"], format="iso", scale="tai"), ) # Make a Gen2 data ID from query results. gen2id = {} if "detector" in datasetType.dimensions.names: gen2id[self.task.config.ccdKey] = row[self.task.config.ccdKey] if "physical_filter" in datasetType.dimensions.names: gen2id["filter"] = row["filter"] # Translate that to Gen3. gen3id, _ = translator(gen2id) dataId = DataCoordinate.standardize(gen3id, graph=datasetType.dimensions) ref = refsByDataId.get(dataId) if ref is not None: # Validity ranges must not overlap for the same dataID # datasetType combination. Use that as a primary # key and store the timespan and ref in a tuple # as the value for later timespan validation. timespansByDataId[(ref.dataId, ref.datasetType.name)].append((timespan, ref)) else: # The Gen2 calib registry mentions this dataset, but it # isn't included in what we've ingested. This might # sometimes be a problem, but it should usually # represent someone just trying to convert a subset of # the Gen2 repo, so I don't think it's appropriate to # warn or even log at info, since in that case there # may be a _lot_ of these messages. self.task.log.debug( "Gen2 calibration registry entry has no dataset: %s for calibDate=%s, %s.", datasetType.name, calibDate, dataId ) # Analyze the timespans to check for overlap problems # Gaps of a day should be closed since we assume differing # conventions in gen2 repos. # We need to correct any validity range issues and store the # results in a dict-of-lists keyed by Timespan, since # Registry.certify operates on one Timespan and multiple refs at a # time. refsByTimespan = defaultdict(list) # A day with a bit of fuzz to indicate the largest gap we will close max_gap = astropy.time.TimeDelta(1.001, format="jd", scale="tai") # Since in many cases the validity ranges are relevant for multiple # dataset types and dataIds we don't want to over-report and so # cache the messages for later. info_messages = set() warn_messages = set() for timespans in timespansByDataId.values(): # Sort all the timespans and check overlaps sorted_timespans = sorted(timespans, key=lambda x: x[0]) timespan_prev, ref_prev = sorted_timespans.pop(0) for timespan, ref in sorted_timespans: # See if we have a suspicious gap delta = timespan.begin - timespan_prev.end abs_delta = abs(delta) if abs_delta > 0 and abs_delta < max_gap: if delta > 0: # Gap between timespans msg = f"Calibration validity gap closed from {timespan_prev.end} to {timespan.begin}" info_messages.add(msg) else: # Overlap of timespans msg = f"Calibration validity overlap of {abs(delta).to(u.s)} removed for period " \ f"{timespan.begin} to {timespan_prev.end}" warn_messages.add(msg) self.task.log.debug("Correcting validity range for %s with end %s", ref_prev, timespan_prev.end) # Assume this gap is down to convention in gen2. # We have to adjust the previous timespan to fit # since we always trust validStart. timespan_prev = Timespan(begin=timespan_prev.begin, end=timespan.begin) # Store the previous timespan and ref since it has now # been verified refsByTimespan[timespan_prev].append(ref_prev) # And update the previous values for the next iteration timespan_prev = timespan ref_prev = ref # Store the final timespan/ref pair refsByTimespan[timespan_prev].append(ref_prev) # Issue any pending log messages we have recorded for msg in sorted(info_messages): self.task.log.info(msg) for msg in sorted(warn_messages): self.task.log.warn(msg) # Done reading from Gen2, time to certify into Gen3. for timespan, refs in refsByTimespan.items(): self.task.registry.certify(self.collection, refs, timespan)