def registerDatasetTypes(registry, pipeline): """Register all dataset types used by tasks in a registry. Copied and modified from `PreExecInit.initializeDatasetTypes`. Parameters ---------- registry : `~lsst.daf.butler.Registry` Registry instance. pipeline : `typing.Iterable` of `TaskDef` Iterable of TaskDef instances, likely the output of the method toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object """ for taskDef in pipeline: configDatasetType = DatasetType(taskDef.configDatasetName, {}, storageClass="Config", universe=registry.dimensions) packagesDatasetType = DatasetType("packages", {}, storageClass="Packages", universe=registry.dimensions) datasetTypes = pipeBase.TaskDatasetTypes.fromTaskDef(taskDef, registry=registry) for datasetType in itertools.chain( datasetTypes.initInputs, datasetTypes.initOutputs, datasetTypes.inputs, datasetTypes.outputs, datasetTypes.prerequisites, [configDatasetType, packagesDatasetType]): _LOG.info("Registering %s with registry", datasetType) # this is a no-op if it already exists and is consistent, # and it raises if it is inconsistent. But components must be # skipped if not datasetType.isComponent(): registry.registerDatasetType(datasetType)
def testConstructor(self): """Test construction preserves values. Note that construction doesn't check for valid storageClass. This can only be verified for a particular schema. """ datasetTypeName = "test" storageClass = StorageClass("test_StructuredData") dimensions = self.universe.extract(("instrument", "visit")) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) self.assertEqual(datasetType.name, datasetTypeName) self.assertEqual(datasetType.storageClass, storageClass) self.assertEqual(datasetType.dimensions, dimensions) with self.assertRaises( ValueError, msg="Construct component without parent storage class"): DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass) with self.assertRaises( ValueError, msg="Construct non-component with parent storage class"): DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed")
def registerDatasetTypes(datasetTypeName, dimensions, storageClass, registry): """Bulk register DatasetTypes """ datasetType = DatasetType(datasetTypeName, dimensions, storageClass) registry.registerDatasetType(datasetType) for compName, compStorageClass in storageClass.components.items(): compType = DatasetType(datasetType.componentTypeName(compName), dimensions, compStorageClass) registry.registerDatasetType(compType)
def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType("data", dimensions=(), storageClass="DataFrame", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType)
def setUp(self): self.universe = DimensionUniverse() datasetTypeName = "test" self.componentStorageClass1 = StorageClass("Component1") self.componentStorageClass2 = StorageClass("Component2") self.parentStorageClass = StorageClass("Parent", components={"a": self.componentStorageClass1, "b": self.componentStorageClass2}) dimensions = self.universe.extract(("instrument", "visit")) self.dataId = dict(instrument="DummyCam", visit=42) self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
def testDatasetLocations(self): registry = self.makeRegistry() storageClass = StorageClass("testStorageInfo") registry.storageClasses.registerStorageClass(storageClass) datasetType = DatasetType(name="test", dimensions=registry.dimensions.extract( ("instrument", )), storageClass=storageClass) datasetType2 = DatasetType(name="test2", dimensions=registry.dimensions.extract( ("instrument", )), storageClass=storageClass) registry.registerDatasetType(datasetType) registry.registerDatasetType(datasetType2) if not registry.limited: registry.addDimensionEntry("instrument", {"instrument": "DummyCam"}) run = registry.makeRun(collection="test") ref = registry.addDataset(datasetType, dataId={"instrument": "DummyCam"}, run=run) ref2 = registry.addDataset(datasetType2, dataId={"instrument": "DummyCam"}, run=run) datastoreName = "dummystore" datastoreName2 = "dummystore2" # Test adding information about a new dataset registry.addDatasetLocation(ref, datastoreName) addresses = registry.getDatasetLocations(ref) self.assertIn(datastoreName, addresses) self.assertEqual(len(addresses), 1) registry.addDatasetLocation(ref, datastoreName2) registry.addDatasetLocation(ref2, datastoreName2) addresses = registry.getDatasetLocations(ref) self.assertEqual(len(addresses), 2) self.assertIn(datastoreName, addresses) self.assertIn(datastoreName2, addresses) registry.removeDatasetLocation(datastoreName, ref) addresses = registry.getDatasetLocations(ref) self.assertEqual(len(addresses), 1) self.assertNotIn(datastoreName, addresses) self.assertIn(datastoreName2, addresses) with self.assertRaises(OrphanedRecordError): registry.removeDataset(ref) registry.removeDatasetLocation(datastoreName2, ref) addresses = registry.getDatasetLocations(ref) self.assertEqual(len(addresses), 0) self.assertNotIn(datastoreName2, addresses) registry.removeDataset(ref) # should not raise addresses = registry.getDatasetLocations(ref2) self.assertEqual(len(addresses), 1) self.assertIn(datastoreName2, addresses)
def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: # Docstring inherited from DatasetRecordStorageManager. if datasetType.isComponent(): raise ValueError("Component dataset types can not be stored in registry." f" Rejecting {datasetType.name}") storage = self._byName.get(datasetType.name) if storage is None: dimensionsKey = self._dimensions.saveDimensionGraph(datasetType.dimensions) tagTableName = makeTagTableName(datasetType, dimensionsKey) calibTableName = (makeCalibTableName(datasetType, dimensionsKey) if datasetType.isCalibration() else None) row, inserted = self._db.sync( self._static.dataset_type, keys={"name": datasetType.name}, compared={ "dimensions_key": dimensionsKey, "storage_class": datasetType.storageClass.name, }, extra={ "tag_association_table": tagTableName, "calibration_association_table": calibTableName, }, returning=["id", "tag_association_table"], ) assert row is not None tags = self._db.ensureTableExists( tagTableName, makeTagTableSpec(datasetType, type(self._collections)), ) if calibTableName is not None: calibs = self._db.ensureTableExists( calibTableName, makeCalibTableSpec(datasetType, type(self._collections), self._db.getTimespanRepresentation()), ) else: calibs = None storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, static=self._static, summaries=self._summaries, tags=tags, calibs=calibs, dataset_type_id=row["id"], collections=self._collections) self._byName[datasetType.name] = storage self._byId[storage._dataset_type_id] = storage else: if datasetType != storage.datasetType: raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent " f"with database definition {storage.datasetType}.") inserted = False return storage, inserted
def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass", run="run2", conform=True): """Make a simple DatasetRef""" if dataId is None: dataId = self.dataId # Pretend we have a parent if this looks like a composite compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName) parentStorageClass = DatasetType.PlaceholderParentStorageClass if componentName else None datasetType = DatasetType(datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()), StorageClass(storageClassName), parentStorageClass=parentStorageClass) return DatasetRef(datasetType, dataId, id=1, run=run, conform=conform)
def testComponents(self): registry = self.makeRegistry() childStorageClass = StorageClass("testComponentsChild") registry.storageClasses.registerStorageClass(childStorageClass) parentStorageClass = StorageClass("testComponentsParent", components={ "child1": childStorageClass, "child2": childStorageClass }) registry.storageClasses.registerStorageClass(parentStorageClass) parentDatasetType = DatasetType(name="parent", dimensions=registry.dimensions.extract( ("instrument", )), storageClass=parentStorageClass) childDatasetType1 = DatasetType(name="parent.child1", dimensions=registry.dimensions.extract( ("instrument", )), storageClass=childStorageClass) childDatasetType2 = DatasetType(name="parent.child2", dimensions=registry.dimensions.extract( ("instrument", )), storageClass=childStorageClass) registry.registerDatasetType(parentDatasetType) registry.registerDatasetType(childDatasetType1) registry.registerDatasetType(childDatasetType2) dataId = {"instrument": "DummyCam"} if not registry.limited: registry.addDimensionEntry("instrument", dataId) run = registry.makeRun(collection="test") parent = registry.addDataset(parentDatasetType, dataId=dataId, run=run) children = { "child1": registry.addDataset(childDatasetType1, dataId=dataId, run=run), "child2": registry.addDataset(childDatasetType2, dataId=dataId, run=run) } for name, child in children.items(): registry.attachComponent(name, parent, child) self.assertEqual(parent.components, children) outParent = registry.getDataset(parent.id) self.assertEqual(outParent.components, children) # Remove the parent; this should remove both children. registry.removeDataset(parent) self.assertIsNone( registry.find(run.collection, parentDatasetType, dataId)) self.assertIsNone( registry.find(run.collection, childDatasetType1, dataId)) self.assertIsNone( registry.find(run.collection, childDatasetType2, dataId))
def testSorting(self): """Can we sort a DatasetType""" storage = StorageClass("test_a") dimensions = self.universe.extract(["instrument"]) d_a = DatasetType("a", dimensions, storage) d_f = DatasetType("f", dimensions, storage) d_p = DatasetType("p", dimensions, storage) sort = sorted([d_p, d_f, d_a]) self.assertEqual(sort, [d_a, d_f, d_p]) # Now with strings with self.assertRaises(TypeError): sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: # Docstring inherited from DatasetRecordStorageManager. if datasetType.isComponent(): raise ValueError("Component dataset types can not be stored in registry." f" Rejecting {datasetType.name}") storage = self._byName.get(datasetType.name) if storage is None: row, inserted = self._db.sync( self._static.dataset_type, keys={"name": datasetType.name}, compared={ "dimensions_encoded": datasetType.dimensions.encode(), "storage_class": datasetType.storageClass.name, }, returning=["id"], ) assert row is not None dynamic = self._db.ensureTableExists( makeDynamicTableName(datasetType), makeDynamicTableSpec(datasetType, type(self._collections)), ) storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, static=self._static, dynamic=dynamic, dataset_type_id=row["id"], collections=self._collections) self._byName[datasetType.name] = storage self._byId[storage._dataset_type_id] = storage else: if datasetType != storage.datasetType: raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent " f"with database definition {storage.datasetType}.") inserted = False return storage, inserted
def run(self, butler): """Construct and optionally save a SkyMap into a gen3 repository Parameters ---------- butler : `lsst.daf.butler.Butler` Butler repository to which the new skymap will be written """ skyMap = self.config.skyMap.apply() skyMap.logSkyMapInfo(self.log) skyMapHash = skyMap.getSha1() try: existing, = butler.registry.query("SELECT skymap FROM skymap WHERE hash=:hash", hash=skyMapHash) raise RuntimeError( (f"SkyMap with name {existing.name} and hash {skyMapHash} already exist in " f"the butler collection {self.collection}, SkyMaps must be unique within " "a collection") ) except ValueError: self.log.info(f"Inserting SkyMap {self.config.name} with hash={skyMapHash}") with butler.registry.transaction(): skyMap.register(self.config.name, butler.registry) butler.registry.registerDatasetType(DatasetType(name=self.config.datasetTypeName, dimensions=["skymap"], storageClass="SkyMap", universe=butler.registry.dimensions)) butler.put(skyMap, self.config.datasetTypeName, {"skymap": self.config.name}) return pipeBase.Struct( skyMap=skyMap )
def assertGetComponents(self, butler, datasetTypeName, dataId, components, reference): for component in components: compTypeName = DatasetType.nameWithComponent( datasetTypeName, component) result = butler.get(compTypeName, dataId) self.assertEqual(result, getattr(reference, component))
def refresh(self) -> None: # Docstring inherited from DatasetRecordStorageManager. byName = {} byId = {} c = self._static.dataset_type.columns for row in self._db.query(self._static.dataset_type.select()).fetchall(): name = row[c.name] dimensions = self._dimensions.loadDimensionGraph(row[c.dimensions_key]) calibTableName = row[c.calibration_association_table] datasetType = DatasetType(name, dimensions, row[c.storage_class], isCalibration=(calibTableName is not None)) tags = self._db.getExistingTable(row[c.tag_association_table], makeTagTableSpec(datasetType, type(self._collections))) if calibTableName is not None: calibs = self._db.getExistingTable(row[c.calibration_association_table], makeCalibTableSpec(datasetType, type(self._collections), self._db.getTimespanRepresentation())) else: calibs = None storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, static=self._static, summaries=self._summaries, tags=tags, calibs=calibs, dataset_type_id=row["id"], collections=self._collections) byName[datasetType.name] = storage byId[storage._dataset_type_id] = storage self._byName = byName self._byId = byId self._summaries.refresh(lambda dataset_type_id: self._byId[dataset_type_id].datasetType)
def testRegistryWithStorageClass(self): """Test that the registry can be given a StorageClass object. """ formatterTypeName = "lsst.daf.butler.formatters.yamlFormatter.YamlFormatter" storageClassName = "TestClass" sc = StorageClass(storageClassName, dict, None) universe = DimensionUniverse.fromConfig() datasetType = DatasetType("calexp", universe.extract([]), sc) # Store using an instance self.factory.registerFormatter(sc, formatterTypeName) # Retrieve using the class f = self.factory.getFormatter(sc, self.fileDescriptor) self.assertIsFormatter(f) self.assertEqual(f.fileDescriptor, self.fileDescriptor) # Retrieve using the DatasetType f2 = self.factory.getFormatter(datasetType, self.fileDescriptor) self.assertIsFormatter(f2) self.assertEqual(f.name(), f2.name()) # Class directly f2cls = self.factory.getFormatterClass(datasetType) self.assertIsFormatter(f2cls) # This might defer the import, pytest may have already loaded it from lsst.daf.butler.formatters.yamlFormatter import YamlFormatter self.assertEqual(type(f), YamlFormatter) with self.assertRaises(KeyError): # Attempt to overwrite using a different value self.factory.registerFormatter(storageClassName, "lsst.daf.butler.formatters.jsonFormatter.JsonFormatter")
def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type], storageClass: StorageClass, universe: DimensionUniverse, formatter: FormatterParameter, translatorFactory: TranslatorFactory, targetHandler: Optional[PathElementHandler] = None, **kwargs: Any): # strip off [%HDU] identifiers from e.g. DECAM Community Pipeline # products template = template.split('[%(')[0] super().__init__(template=template, keys=keys) self._translator = translatorFactory.makeMatching( datasetTypeName, keys, **kwargs) self.datasetType = DatasetType( datasetTypeName, dimensions=self._translator.dimensionNames, storageClass=storageClass, universe=universe, isCalibration=("calibDate" in keys)) self._formatter = formatter if targetHandler is None: targetHandler = TargetFileHandler self._handler = targetHandler
def testHealSparseMapFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("map", [], "HealSparseMap", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) ref = butler.put(self.hspMap, datasetType) uri = butler.getURI(ref) self.assertEqual(uri.getExtension(), '.hsp') # Retrieve the full map. hspMap = butler.get('map') self.assertTrue(np.all(hspMap._sparse_map == self.hspMap._sparse_map)) # Retrieve the coverage map coverage = butler.get('map.coverage') self.assertTrue( np.all(coverage.coverage_mask == self.hspMap.coverage_mask)) # Retrieve a partial map pixels = [0, 6] partialMap = butler.get('map', parameters={'pixels': pixels}) self.assertTrue( np.all(np.where(partialMap.coverage_mask)[0] == np.array(pixels))) self.assertTrue(np.all(partialMap[0:10000] == self.hspMap[0:10000])) self.assertTrue( np.all(partialMap[100000:110000] == self.hspMap[100000:110000])) # Retrieve a degraded map degradedMapRead = butler.get('map', parameters={'degrade_nside': 512}) degradedMap = self.hspMap.degrade(512) self.assertTrue( np.all(degradedMapRead._sparse_map == degradedMap._sparse_map))
def makeTask(self, taskClass, config, overrides, butler): """Create new PipelineTask instance from its class. Parameters ---------- taskClass : type PipelineTask class. config : `pex.Config` or None Configuration object, if ``None`` then use task-defined configuration class to create new instance. overrides : `ConfigOverrides` or None Configuration overrides, this should contain all overrides to be applied to a default task config, including instrument-specific, obs-package specific, and possibly command-line overrides. butler : `lsst.daf.butler.Butler` or None Butler instance used to obtain initialization inputs for PipelineTasks. If None, some PipelineTasks will not be usable Returns ------- Instance of a PipelineTask class or None on errors. Raises ------ Any exceptions that are raised by PipelineTask constructor or its configuration class are propagated back to caller. """ # configuration if config is None: config = taskClass.ConfigClass() if overrides: overrides.applyTo(config) elif overrides is not None: _LOG.warning( "Both config and overrides are specified for task %s, overrides are ignored", taskClass.__name__) # if we don't have a butler, try to construct without initInputs; # let PipelineTasks raise if that's impossible if butler is None: initInputs = None else: connections = config.connections.ConnectionsClass(config=config) descriptorMap = {} for name in connections.initInputs: attribute = getattr(connections, name) dsType = DatasetType(attribute.name, butler.registry.dimensions.extract(set()), attribute.storageClass) descriptorMap[name] = dsType initInputs = {k: butler.get(v) for k, v in descriptorMap.items()} # Freeze the config config.freeze() # make task instance task = taskClass(config=config, initInputs=initInputs) return task
def getInitInputs(butler: Butler, config: PipelineTaskConfig) -> Dict[str, Any]: """Return the initInputs object that would have been passed to a `~lsst.pipe.base.PipelineTask` constructor. Parameters ---------- butler : `lsst.daf.butler.Butler` The repository to search for input datasets. Must have pre-configured collections. config : `lsst.pipe.base.PipelineTaskConfig` The config for the task to be constructed. Returns ------- initInputs : `dict` [`str`] A dictionary of objects in the format of the ``initInputs`` parameter to `lsst.pipe.base.PipelineTask`. """ connections = config.connections.ConnectionsClass(config=config) initInputs = {} for name in connections.initInputs: attribute = getattr(connections, name) # Get full dataset type to check for consistency problems dsType = DatasetType(attribute.name, butler.registry.dimensions.extract(set()), attribute.storageClass) # All initInputs have empty data IDs initInputs[name] = butler.get(dsType) return initInputs
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([ self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # The test after this will not work if we don't have local file self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}") with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def put_values(repo, visit, detector, instrument, out_collection, ra=None, dec=None, size=None, filename=None): butler = Butler(repo, writeable=True, run=out_collection) # This doesn't strictly need to be done every time, # but doesn't seem to hurt if the # dataset type already exists position_dataset_type = DatasetType('cutout_positions', dimensions=['visit', 'detector', 'instrument'], universe=butler.registry.dimensions, storageClass='AstropyQTable') butler.registry.registerDatasetType(position_dataset_type) if filename: poslist = numpy.genfromtxt(filename, dtype=None, delimiter=',') else: poslist = [(ra, dec, size), ] ident = [] pos = [] size = [] for i, rec in enumerate(poslist): pt = SkyCoord(rec[0], rec[1], frame='icrs', unit=u.deg) pos.append(pt) ident.append(i*u.dimensionless_unscaled) size.append(float(rec[2])*u.dimensionless_unscaled) out_table = QTable([ident, pos, size], names=['id', 'position', 'size']) butler.put(out_table, 'cutout_positions', visit=visit, detector=detector, instrument=instrument)
def run(self, butler): """Construct and optionally save a SkyMap into a gen3 repository Parameters ---------- butler : `lsst.daf.butler.Butler` Butler repository to which the new skymap will be written """ skyMap = self.config.skyMap.apply() skyMap.logSkyMapInfo(self.log) skyMapHash = skyMap.getSha1() self.log.info( f"Inserting SkyMap {self.config.name} with hash={skyMapHash}") with butler.registry.transaction(): try: skyMap.register(self.config.name, butler.registry) except IntegrityError as err: raise RuntimeError( "A skymap with the same name or hash already exists." ) from err butler.registry.registerDatasetType( DatasetType(name=self.config.datasetTypeName, dimensions=["skymap"], storageClass="SkyMap", universe=butler.registry.dimensions)) butler.put(skyMap, self.config.datasetTypeName, {"skymap": self.config.name}) return pipeBase.Struct(skyMap=skyMap)
def refresh(self, *, universe: DimensionUniverse) -> None: # Docstring inherited from DatasetRecordStorageManager. byName = {} byId = {} c = self._static.dataset_type.columns for row in self._db.query( self._static.dataset_type.select()).fetchall(): name = row[c.name] dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe) datasetType = DatasetType(name, dimensions, row[c.storage_class]) dynamic = self._db.getExistingTable( makeDynamicTableName(datasetType), makeDynamicTableSpec(datasetType, type(self._collections))) storage = ByDimensionsDatasetRecordStorage( db=self._db, datasetType=datasetType, static=self._static, dynamic=dynamic, dataset_type_id=row["id"], collections=self._collections) byName[datasetType.name] = storage byId[storage._dataset_type_id] = storage self._byName = byName self._byId = byId
def testCollections(self): registry = self.makeRegistry() storageClass = StorageClass("testCollections") registry.storageClasses.registerStorageClass(storageClass) datasetType = DatasetType(name="dummytype", dimensions=registry.dimensions.extract( ("instrument", "visit")), storageClass=storageClass) registry.registerDatasetType(datasetType) if not registry.limited: registry.addDimensionEntry("instrument", {"instrument": "DummyCam"}) registry.addDimensionEntry("physical_filter", { "instrument": "DummyCam", "physical_filter": "d-r" }) registry.addDimensionEntry("visit", { "instrument": "DummyCam", "visit": 0, "physical_filter": "d-r" }) registry.addDimensionEntry("visit", { "instrument": "DummyCam", "visit": 1, "physical_filter": "d-r" }) collection = "ingest" run = registry.makeRun(collection=collection) # Dataset.physical_filter should be populated as well here from the # visit Dimension values, if the Registry isn't limited. dataId1 = {"instrument": "DummyCam", "visit": 0} if registry.limited: dataId1.update(physical_filter="d-r", abstract_filter=None) inputRef1 = registry.addDataset(datasetType, dataId=dataId1, run=run) dataId2 = {"instrument": "DummyCam", "visit": 1} if registry.limited: dataId2.update(physical_filter="d-r", abstract_filter=None) inputRef2 = registry.addDataset(datasetType, dataId=dataId2, run=run) # We should be able to find both datasets in their Run.collection outputRef = registry.find(run.collection, datasetType, dataId1) self.assertEqual(outputRef, inputRef1) outputRef = registry.find(run.collection, datasetType, dataId2) self.assertEqual(outputRef, inputRef2) # and with the associated collection newCollection = "something" registry.associate(newCollection, [inputRef1, inputRef2]) outputRef = registry.find(newCollection, datasetType, dataId1) self.assertEqual(outputRef, inputRef1) outputRef = registry.find(newCollection, datasetType, dataId2) self.assertEqual(outputRef, inputRef2) # but no more after disassociation registry.disassociate(newCollection, [ inputRef1, ]) self.assertIsNone(registry.find(newCollection, datasetType, dataId1)) outputRef = registry.find(newCollection, datasetType, dataId2) self.assertEqual(outputRef, inputRef2) collections = registry.getAllCollections() self.assertEqual(collections, {"something", "ingest"})
def prep(self): # Docstring inherited from RepoConverter. self.task.log.info(f"Looking for skymaps in root {self.root}.") for coaddName, datasetTypeName in SKYMAP_DATASET_TYPES.items(): if not self.task.isDatasetTypeIncluded(datasetTypeName): continue try: exists = self.butler2.datasetExists(datasetTypeName) except AttributeError: # This mapper doesn't even define this dataset type. continue if not exists: continue instance = self.butler2.get(datasetTypeName) name = self.task.useSkyMap(instance, datasetTypeName) datasetType = DatasetType(datasetTypeName, dimensions=["skymap"], storageClass="SkyMap", universe=self.task.universe) dataId = DataCoordinate.standardize(skymap=name, universe=self.task.universe) struct = FoundSkyMap(name=name, instance=instance, coaddName=coaddName, ref=DatasetRef(datasetType, dataId), filename=self.butler2.getUri(datasetTypeName)) self._foundSkyMapsByCoaddName[coaddName] = struct self.task.log.info("Found skymap %s in %s in %s.", name, datasetTypeName, self.root) super().prep()
def _makeDatasetRef(self, datasetTypeName, dimensions, storageClass, dataId, *, id=None, run=None, conform=True): # helper for makeDatasetRef # Pretend we have a parent if this looks like a composite compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName) parentStorageClass = StorageClass("component") if componentName else None datasetType = DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass=parentStorageClass) if id is None: self.id += 1 id = self.id if run is None: run = "dummy" return DatasetRef(datasetType, dataId, id=id, run=run, conform=conform)
def makeDatasetType( self, universe: DimensionUniverse, parentStorageClass: Optional[Union[StorageClass, str]] = None ) -> DatasetType: """Construct a true `DatasetType` instance with normalized dimensions. Parameters ---------- universe : `lsst.daf.butler.DimensionUniverse` Set of all known dimensions to be used to normalize the dimension names specified in config. parentStorageClass : `lsst.daf.butler.StorageClass` or `str`, optional Parent storage class for component datasets; `None` otherwise. Returns ------- datasetType : `DatasetType` The `DatasetType` defined by this connection. """ return DatasetType( self.name, universe.extract(self.dimensions), self.storageClass, isCalibration=self.isCalibration, parentStorageClass=parentStorageClass, )
def makeDatasetRef(self, datasetTypeName, dataUnits, storageClass, dataId, id=None): """Make a DatasetType and wrap it in a DatasetRef for a test""" datasetType = DatasetType(datasetTypeName, dataUnits, storageClass) if id is None: self.id += 1 id = self.id return DatasetRef(datasetType, dataId, id=id)
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # Following test needs a local file with uri.as_local() as local: with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue( filecmp.cmp( local.ospath, file.name, shallow=True ) ) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def make_dataset_type(butler, name, dimensions, storageClass): """Create a dataset type in a particular repository. Parameters ---------- butler : `lsst.daf.butler.Butler` The repository to update. name : `str` The name of the dataset type. dimensions : `set` [`str`] The dimensions of the new dataset type. storageClass : `str` The storage class the dataset will use. Returns ------- dataset_type : `lsst.daf.butler.DatasetType` The new type. Raises ------ ValueError Raised if the dimensions or storage class are invalid. ConflictingDefinitionError Raised if another dataset type with the same name already exists. """ dataset_type = DatasetType(name, dimensions, storageClass, universe=butler.registry.dimensions) butler.registry.registerDatasetType(dataset_type) return dataset_type